~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/mm/swapfile.c

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /mm/swapfile.c (Version linux-6.12-rc7) and /mm/swapfile.c (Version linux-4.12.14)


  1 // SPDX-License-Identifier: GPL-2.0-only       << 
  2 /*                                                  1 /*
  3  *  linux/mm/swapfile.c                             2  *  linux/mm/swapfile.c
  4  *                                                  3  *
  5  *  Copyright (C) 1991, 1992, 1993, 1994  Linu      4  *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
  6  *  Swap reorganised 29.12.95, Stephen Tweedie      5  *  Swap reorganised 29.12.95, Stephen Tweedie
  7  */                                                 6  */
  8                                                     7 
  9 #include <linux/blkdev.h>                      << 
 10 #include <linux/mm.h>                               8 #include <linux/mm.h>
 11 #include <linux/sched/mm.h>                         9 #include <linux/sched/mm.h>
 12 #include <linux/sched/task.h>                      10 #include <linux/sched/task.h>
 13 #include <linux/hugetlb.h>                         11 #include <linux/hugetlb.h>
 14 #include <linux/mman.h>                            12 #include <linux/mman.h>
 15 #include <linux/slab.h>                            13 #include <linux/slab.h>
 16 #include <linux/kernel_stat.h>                     14 #include <linux/kernel_stat.h>
 17 #include <linux/swap.h>                            15 #include <linux/swap.h>
 18 #include <linux/vmalloc.h>                         16 #include <linux/vmalloc.h>
 19 #include <linux/pagemap.h>                         17 #include <linux/pagemap.h>
 20 #include <linux/namei.h>                           18 #include <linux/namei.h>
 21 #include <linux/shmem_fs.h>                        19 #include <linux/shmem_fs.h>
 22 #include <linux/blk-cgroup.h>                  !!  20 #include <linux/blkdev.h>
 23 #include <linux/random.h>                          21 #include <linux/random.h>
 24 #include <linux/writeback.h>                       22 #include <linux/writeback.h>
 25 #include <linux/proc_fs.h>                         23 #include <linux/proc_fs.h>
 26 #include <linux/seq_file.h>                        24 #include <linux/seq_file.h>
 27 #include <linux/init.h>                            25 #include <linux/init.h>
 28 #include <linux/ksm.h>                             26 #include <linux/ksm.h>
 29 #include <linux/rmap.h>                            27 #include <linux/rmap.h>
 30 #include <linux/security.h>                        28 #include <linux/security.h>
 31 #include <linux/backing-dev.h>                     29 #include <linux/backing-dev.h>
 32 #include <linux/mutex.h>                           30 #include <linux/mutex.h>
 33 #include <linux/capability.h>                      31 #include <linux/capability.h>
 34 #include <linux/syscalls.h>                        32 #include <linux/syscalls.h>
 35 #include <linux/memcontrol.h>                      33 #include <linux/memcontrol.h>
 36 #include <linux/poll.h>                            34 #include <linux/poll.h>
 37 #include <linux/oom.h>                             35 #include <linux/oom.h>
                                                   >>  36 #include <linux/frontswap.h>
 38 #include <linux/swapfile.h>                        37 #include <linux/swapfile.h>
 39 #include <linux/export.h>                          38 #include <linux/export.h>
 40 #include <linux/swap_slots.h>                      39 #include <linux/swap_slots.h>
 41 #include <linux/sort.h>                        << 
 42 #include <linux/completion.h>                  << 
 43 #include <linux/suspend.h>                     << 
 44 #include <linux/zswap.h>                       << 
 45 #include <linux/plist.h>                       << 
 46                                                    40 
                                                   >>  41 #include <asm/pgtable.h>
 47 #include <asm/tlbflush.h>                          42 #include <asm/tlbflush.h>
 48 #include <linux/swapops.h>                         43 #include <linux/swapops.h>
 49 #include <linux/swap_cgroup.h>                     44 #include <linux/swap_cgroup.h>
 50 #include "internal.h"                          << 
 51 #include "swap.h"                              << 
 52                                                    45 
 53 static bool swap_count_continued(struct swap_i     46 static bool swap_count_continued(struct swap_info_struct *, pgoff_t,
 54                                  unsigned char     47                                  unsigned char);
 55 static void free_swap_count_continuations(stru     48 static void free_swap_count_continuations(struct swap_info_struct *);
 56 static void swap_entry_range_free(struct swap_ !!  49 static sector_t map_swap_entry(swp_entry_t, struct block_device**);
 57                                   unsigned int << 
 58 static void swap_range_alloc(struct swap_info_ << 
 59                              unsigned int nr_e << 
 60 static bool folio_swapcache_freeable(struct fo << 
 61 static struct swap_cluster_info *lock_cluster_ << 
 62                 struct swap_info_struct *si, u << 
 63 static void unlock_cluster_or_swap_info(struct << 
 64                                         struct << 
 65                                                    50 
 66 static DEFINE_SPINLOCK(swap_lock);             !!  51 DEFINE_SPINLOCK(swap_lock);
 67 static unsigned int nr_swapfiles;                  52 static unsigned int nr_swapfiles;
 68 atomic_long_t nr_swap_pages;                       53 atomic_long_t nr_swap_pages;
 69 /*                                                 54 /*
 70  * Some modules use swappable objects and may      55  * Some modules use swappable objects and may try to swap them out under
 71  * memory pressure (via the shrinker). Before      56  * memory pressure (via the shrinker). Before doing so, they may wish to
 72  * check to see if any swap space is available     57  * check to see if any swap space is available.
 73  */                                                58  */
 74 EXPORT_SYMBOL_GPL(nr_swap_pages);                  59 EXPORT_SYMBOL_GPL(nr_swap_pages);
 75 /* protected with swap_lock. reading in vm_swa     60 /* protected with swap_lock. reading in vm_swap_full() doesn't need lock */
 76 long total_swap_pages;                             61 long total_swap_pages;
 77 static int least_priority = -1;                !!  62 static int least_priority;
 78 unsigned long swapfile_maximum_size;           << 
 79 #ifdef CONFIG_MIGRATION                        << 
 80 bool swap_migration_ad_supported;              << 
 81 #endif  /* CONFIG_MIGRATION */                 << 
 82                                                    63 
 83 static const char Bad_file[] = "Bad swap file      64 static const char Bad_file[] = "Bad swap file entry ";
 84 static const char Unused_file[] = "Unused swap     65 static const char Unused_file[] = "Unused swap file entry ";
 85 static const char Bad_offset[] = "Bad swap off     66 static const char Bad_offset[] = "Bad swap offset entry ";
 86 static const char Unused_offset[] = "Unused sw     67 static const char Unused_offset[] = "Unused swap offset entry ";
 87                                                    68 
 88 /*                                                 69 /*
 89  * all active swap_info_structs                    70  * all active swap_info_structs
 90  * protected with swap_lock, and ordered by pr     71  * protected with swap_lock, and ordered by priority.
 91  */                                                72  */
 92 static PLIST_HEAD(swap_active_head);           !!  73 PLIST_HEAD(swap_active_head);
 93                                                    74 
 94 /*                                                 75 /*
 95  * all available (active, not full) swap_info_     76  * all available (active, not full) swap_info_structs
 96  * protected with swap_avail_lock, ordered by      77  * protected with swap_avail_lock, ordered by priority.
 97  * This is used by folio_alloc_swap() instead  !!  78  * This is used by get_swap_page() instead of swap_active_head
 98  * because swap_active_head includes all swap_     79  * because swap_active_head includes all swap_info_structs,
 99  * but folio_alloc_swap() doesn't need to look !!  80  * but get_swap_page() doesn't need to look at full ones.
100  * This uses its own lock instead of swap_lock     81  * This uses its own lock instead of swap_lock because when a
101  * swap_info_struct changes between not-full/f     82  * swap_info_struct changes between not-full/full, it needs to
102  * add/remove itself to/from this list, but th     83  * add/remove itself to/from this list, but the swap_info_struct->lock
103  * is held and the locking order requires swap     84  * is held and the locking order requires swap_lock to be taken
104  * before any swap_info_struct->lock.              85  * before any swap_info_struct->lock.
105  */                                                86  */
106 static struct plist_head *swap_avail_heads;    !!  87 static PLIST_HEAD(swap_avail_head);
107 static DEFINE_SPINLOCK(swap_avail_lock);           88 static DEFINE_SPINLOCK(swap_avail_lock);
108                                                    89 
109 static struct swap_info_struct *swap_info[MAX_ !!  90 struct swap_info_struct *swap_info[MAX_SWAPFILES];
110                                                    91 
111 static DEFINE_MUTEX(swapon_mutex);                 92 static DEFINE_MUTEX(swapon_mutex);
112                                                    93 
113 static DECLARE_WAIT_QUEUE_HEAD(proc_poll_wait)     94 static DECLARE_WAIT_QUEUE_HEAD(proc_poll_wait);
114 /* Activity counter to indicate that a swapon      95 /* Activity counter to indicate that a swapon or swapoff has occurred */
115 static atomic_t proc_poll_event = ATOMIC_INIT(     96 static atomic_t proc_poll_event = ATOMIC_INIT(0);
116                                                    97 
117 atomic_t nr_rotate_swap = ATOMIC_INIT(0);      << 
118                                                << 
119 static struct swap_info_struct *swap_type_to_s << 
120 {                                              << 
121         if (type >= MAX_SWAPFILES)             << 
122                 return NULL;                   << 
123                                                << 
124         return READ_ONCE(swap_info[type]); /*  << 
125 }                                              << 
126                                                << 
127 static inline unsigned char swap_count(unsigne     98 static inline unsigned char swap_count(unsigned char ent)
128 {                                                  99 {
129         return ent & ~SWAP_HAS_CACHE;   /* may !! 100         return ent & ~SWAP_HAS_CACHE;   /* may include SWAP_HAS_CONT flag */
130 }                                              << 
131                                                << 
132 /* Reclaim the swap entry anyway if possible * << 
133 #define TTRS_ANYWAY             0x1            << 
134 /*                                             << 
135  * Reclaim the swap entry if there are no more << 
136  * corresponding page                          << 
137  */                                            << 
138 #define TTRS_UNMAPPED           0x2            << 
139 /* Reclaim the swap entry if swap is getting f << 
140 #define TTRS_FULL               0x4            << 
141 /* Reclaim directly, bypass the slot cache and << 
142 #define TTRS_DIRECT             0x8            << 
143                                                << 
144 static bool swap_is_has_cache(struct swap_info << 
145                               unsigned long of << 
146 {                                              << 
147         unsigned char *map = si->swap_map + of << 
148         unsigned char *map_end = map + nr_page << 
149                                                << 
150         do {                                   << 
151                 VM_BUG_ON(!(*map & SWAP_HAS_CA << 
152                 if (*map != SWAP_HAS_CACHE)    << 
153                         return false;          << 
154         } while (++map < map_end);             << 
155                                                << 
156         return true;                           << 
157 }                                              << 
158                                                << 
159 static bool swap_is_last_map(struct swap_info_ << 
160                 unsigned long offset, int nr_p << 
161 {                                              << 
162         unsigned char *map = si->swap_map + of << 
163         unsigned char *map_end = map + nr_page << 
164         unsigned char count = *map;            << 
165                                                << 
166         if (swap_count(count) != 1)            << 
167                 return false;                  << 
168                                                << 
169         while (++map < map_end) {              << 
170                 if (*map != count)             << 
171                         return false;          << 
172         }                                      << 
173                                                << 
174         *has_cache = !!(count & SWAP_HAS_CACHE << 
175         return true;                           << 
176 }                                                 101 }
177                                                   102 
178 /*                                             !! 103 /* returns 1 if swap entry is freed */
179  * returns number of pages in the folio that b !! 104 static int
180  * the folio was reclaimed. If negative, the f !! 105 __try_to_reclaim_swap(struct swap_info_struct *si, unsigned long offset)
181  * folio was associated with the swap entry.   << 
182  */                                            << 
183 static int __try_to_reclaim_swap(struct swap_i << 
184                                  unsigned long << 
185 {                                                 106 {
186         swp_entry_t entry = swp_entry(si->type    107         swp_entry_t entry = swp_entry(si->type, offset);
187         struct address_space *address_space =  !! 108         struct page *page;
188         struct swap_cluster_info *ci;          !! 109         int ret = 0;
189         struct folio *folio;                   << 
190         int ret, nr_pages;                     << 
191         bool need_reclaim;                     << 
192                                                   110 
193         folio = filemap_get_folio(address_spac !! 111         page = find_get_page(swap_address_space(entry), swp_offset(entry));
194         if (IS_ERR(folio))                     !! 112         if (!page)
195                 return 0;                         113                 return 0;
196                                                << 
197         nr_pages = folio_nr_pages(folio);      << 
198         ret = -nr_pages;                       << 
199                                                << 
200         /*                                        114         /*
201          * When this function is called from s !! 115          * This function is called from scan_swap_map() and it's called
202          * called by vmscan.c at reclaiming fo !! 116          * by vmscan.c at reclaiming pages. So, we hold a lock on a page, here.
203          * here. We have to use trylock for av !! 117          * We have to use trylock for avoiding deadlock. This is a special
204          * case and you should use folio_free_ !! 118          * case and you should use try_to_free_swap() with explicit lock_page()
205          * in usual operations.                   119          * in usual operations.
206          */                                       120          */
207         if (!folio_trylock(folio))             !! 121         if (trylock_page(page)) {
208                 goto out;                      !! 122                 ret = try_to_free_swap(page);
209                                                !! 123                 unlock_page(page);
210         /* offset could point to the middle of !! 124         }
211         entry = folio->swap;                   !! 125         put_page(page);
212         offset = swp_offset(entry);            << 
213                                                << 
214         need_reclaim = ((flags & TTRS_ANYWAY)  << 
215                         ((flags & TTRS_UNMAPPE << 
216                         ((flags & TTRS_FULL) & << 
217         if (!need_reclaim || !folio_swapcache_ << 
218                 goto out_unlock;               << 
219                                                << 
220         /*                                     << 
221          * It's safe to delete the folio from  << 
222          * swap_map is HAS_CACHE only, which m << 
223          * reference or pending writeback, and << 
224          */                                    << 
225         ci = lock_cluster_or_swap_info(si, off << 
226         need_reclaim = swap_is_has_cache(si, o << 
227         unlock_cluster_or_swap_info(si, ci);   << 
228         if (!need_reclaim)                     << 
229                 goto out_unlock;               << 
230                                                << 
231         if (!(flags & TTRS_DIRECT)) {          << 
232                 /* Free through slot cache */  << 
233                 delete_from_swap_cache(folio); << 
234                 folio_set_dirty(folio);        << 
235                 ret = nr_pages;                << 
236                 goto out_unlock;               << 
237         }                                      << 
238                                                << 
239         xa_lock_irq(&address_space->i_pages);  << 
240         __delete_from_swap_cache(folio, entry, << 
241         xa_unlock_irq(&address_space->i_pages) << 
242         folio_ref_sub(folio, nr_pages);        << 
243         folio_set_dirty(folio);                << 
244                                                << 
245         spin_lock(&si->lock);                  << 
246         /* Only sinple page folio can be backe << 
247         if (nr_pages == 1)                     << 
248                 zswap_invalidate(entry);       << 
249         swap_entry_range_free(si, entry, nr_pa << 
250         spin_unlock(&si->lock);                << 
251         ret = nr_pages;                        << 
252 out_unlock:                                    << 
253         folio_unlock(folio);                   << 
254 out:                                           << 
255         folio_put(folio);                      << 
256         return ret;                               126         return ret;
257 }                                                 127 }
258                                                   128 
259 static inline struct swap_extent *first_se(str << 
260 {                                              << 
261         struct rb_node *rb = rb_first(&sis->sw << 
262         return rb_entry(rb, struct swap_extent << 
263 }                                              << 
264                                                << 
265 static inline struct swap_extent *next_se(stru << 
266 {                                              << 
267         struct rb_node *rb = rb_next(&se->rb_n << 
268         return rb ? rb_entry(rb, struct swap_e << 
269 }                                              << 
270                                                << 
271 /*                                                129 /*
272  * swapon tell device that all the old swap co    130  * swapon tell device that all the old swap contents can be discarded,
273  * to allow the swap device to optimize its we    131  * to allow the swap device to optimize its wear-levelling.
274  */                                               132  */
275 static int discard_swap(struct swap_info_struc    133 static int discard_swap(struct swap_info_struct *si)
276 {                                                 134 {
277         struct swap_extent *se;                   135         struct swap_extent *se;
278         sector_t start_block;                     136         sector_t start_block;
279         sector_t nr_blocks;                       137         sector_t nr_blocks;
280         int err = 0;                              138         int err = 0;
281                                                   139 
282         /* Do not discard the swap header page    140         /* Do not discard the swap header page! */
283         se = first_se(si);                     !! 141         se = &si->first_swap_extent;
284         start_block = (se->start_block + 1) <<    142         start_block = (se->start_block + 1) << (PAGE_SHIFT - 9);
285         nr_blocks = ((sector_t)se->nr_pages -     143         nr_blocks = ((sector_t)se->nr_pages - 1) << (PAGE_SHIFT - 9);
286         if (nr_blocks) {                          144         if (nr_blocks) {
287                 err = blkdev_issue_discard(si-    145                 err = blkdev_issue_discard(si->bdev, start_block,
288                                 nr_blocks, GFP !! 146                                 nr_blocks, GFP_KERNEL, 0);
289                 if (err)                          147                 if (err)
290                         return err;               148                         return err;
291                 cond_resched();                   149                 cond_resched();
292         }                                         150         }
293                                                   151 
294         for (se = next_se(se); se; se = next_s !! 152         list_for_each_entry(se, &si->first_swap_extent.list, list) {
295                 start_block = se->start_block     153                 start_block = se->start_block << (PAGE_SHIFT - 9);
296                 nr_blocks = (sector_t)se->nr_p    154                 nr_blocks = (sector_t)se->nr_pages << (PAGE_SHIFT - 9);
297                                                   155 
298                 err = blkdev_issue_discard(si-    156                 err = blkdev_issue_discard(si->bdev, start_block,
299                                 nr_blocks, GFP !! 157                                 nr_blocks, GFP_KERNEL, 0);
300                 if (err)                          158                 if (err)
301                         break;                    159                         break;
302                                                   160 
303                 cond_resched();                   161                 cond_resched();
304         }                                         162         }
305         return err;             /* That will o    163         return err;             /* That will often be -EOPNOTSUPP */
306 }                                                 164 }
307                                                   165 
308 static struct swap_extent *                    << 
309 offset_to_swap_extent(struct swap_info_struct  << 
310 {                                              << 
311         struct swap_extent *se;                << 
312         struct rb_node *rb;                    << 
313                                                << 
314         rb = sis->swap_extent_root.rb_node;    << 
315         while (rb) {                           << 
316                 se = rb_entry(rb, struct swap_ << 
317                 if (offset < se->start_page)   << 
318                         rb = rb->rb_left;      << 
319                 else if (offset >= se->start_p << 
320                         rb = rb->rb_right;     << 
321                 else                           << 
322                         return se;             << 
323         }                                      << 
324         /* It *must* be present */             << 
325         BUG();                                 << 
326 }                                              << 
327                                                << 
328 sector_t swap_folio_sector(struct folio *folio << 
329 {                                              << 
330         struct swap_info_struct *sis = swp_swa << 
331         struct swap_extent *se;                << 
332         sector_t sector;                       << 
333         pgoff_t offset;                        << 
334                                                << 
335         offset = swp_offset(folio->swap);      << 
336         se = offset_to_swap_extent(sis, offset << 
337         sector = se->start_block + (offset - s << 
338         return sector << (PAGE_SHIFT - 9);     << 
339 }                                              << 
340                                                << 
341 /*                                                166 /*
342  * swap allocation tell device that a cluster     167  * swap allocation tell device that a cluster of swap can now be discarded,
343  * to allow the swap device to optimize its we    168  * to allow the swap device to optimize its wear-levelling.
344  */                                               169  */
345 static void discard_swap_cluster(struct swap_i    170 static void discard_swap_cluster(struct swap_info_struct *si,
346                                  pgoff_t start    171                                  pgoff_t start_page, pgoff_t nr_pages)
347 {                                                 172 {
348         struct swap_extent *se = offset_to_swa !! 173         struct swap_extent *se = si->curr_swap_extent;
                                                   >> 174         int found_extent = 0;
349                                                   175 
350         while (nr_pages) {                        176         while (nr_pages) {
351                 pgoff_t offset = start_page -  !! 177                 if (se->start_page <= start_page &&
352                 sector_t start_block = se->sta !! 178                     start_page < se->start_page + se->nr_pages) {
353                 sector_t nr_blocks = se->nr_pa !! 179                         pgoff_t offset = start_page - se->start_page;
354                                                !! 180                         sector_t start_block = se->start_block + offset;
355                 if (nr_blocks > nr_pages)      !! 181                         sector_t nr_blocks = se->nr_pages - offset;
356                         nr_blocks = nr_pages;  !! 182 
357                 start_page += nr_blocks;       !! 183                         if (nr_blocks > nr_pages)
358                 nr_pages -= nr_blocks;         !! 184                                 nr_blocks = nr_pages;
359                                                !! 185                         start_page += nr_blocks;
360                 start_block <<= PAGE_SHIFT - 9 !! 186                         nr_pages -= nr_blocks;
361                 nr_blocks <<= PAGE_SHIFT - 9;  !! 187 
362                 if (blkdev_issue_discard(si->b !! 188                         if (!found_extent++)
363                                         nr_blo !! 189                                 si->curr_swap_extent = se;
364                         break;                 !! 190 
                                                   >> 191                         start_block <<= PAGE_SHIFT - 9;
                                                   >> 192                         nr_blocks <<= PAGE_SHIFT - 9;
                                                   >> 193                         if (blkdev_issue_discard(si->bdev, start_block,
                                                   >> 194                                     nr_blocks, GFP_NOIO, 0))
                                                   >> 195                                 break;
                                                   >> 196                 }
365                                                   197 
366                 se = next_se(se);              !! 198                 se = list_next_entry(se, list);
367         }                                         199         }
368 }                                                 200 }
369                                                   201 
370 #ifdef CONFIG_THP_SWAP                         << 
371 #define SWAPFILE_CLUSTER        HPAGE_PMD_NR   << 
372                                                << 
373 #define swap_entry_order(order) (order)        << 
374 #else                                          << 
375 #define SWAPFILE_CLUSTER        256               202 #define SWAPFILE_CLUSTER        256
376                                                << 
377 /*                                             << 
378  * Define swap_entry_order() as constant to le << 
379  * out some code if !CONFIG_THP_SWAP           << 
380  */                                            << 
381 #define swap_entry_order(order) 0              << 
382 #endif                                         << 
383 #define LATENCY_LIMIT           256               203 #define LATENCY_LIMIT           256
384                                                   204 
                                                   >> 205 static inline void cluster_set_flag(struct swap_cluster_info *info,
                                                   >> 206         unsigned int flag)
                                                   >> 207 {
                                                   >> 208         info->flags = flag;
                                                   >> 209 }
                                                   >> 210 
                                                   >> 211 static inline unsigned int cluster_count(struct swap_cluster_info *info)
                                                   >> 212 {
                                                   >> 213         return info->data;
                                                   >> 214 }
                                                   >> 215 
                                                   >> 216 static inline void cluster_set_count(struct swap_cluster_info *info,
                                                   >> 217                                      unsigned int c)
                                                   >> 218 {
                                                   >> 219         info->data = c;
                                                   >> 220 }
                                                   >> 221 
                                                   >> 222 static inline void cluster_set_count_flag(struct swap_cluster_info *info,
                                                   >> 223                                          unsigned int c, unsigned int f)
                                                   >> 224 {
                                                   >> 225         info->flags = f;
                                                   >> 226         info->data = c;
                                                   >> 227 }
                                                   >> 228 
                                                   >> 229 static inline unsigned int cluster_next(struct swap_cluster_info *info)
                                                   >> 230 {
                                                   >> 231         return info->data;
                                                   >> 232 }
                                                   >> 233 
                                                   >> 234 static inline void cluster_set_next(struct swap_cluster_info *info,
                                                   >> 235                                     unsigned int n)
                                                   >> 236 {
                                                   >> 237         info->data = n;
                                                   >> 238 }
                                                   >> 239 
                                                   >> 240 static inline void cluster_set_next_flag(struct swap_cluster_info *info,
                                                   >> 241                                          unsigned int n, unsigned int f)
                                                   >> 242 {
                                                   >> 243         info->flags = f;
                                                   >> 244         info->data = n;
                                                   >> 245 }
                                                   >> 246 
385 static inline bool cluster_is_free(struct swap    247 static inline bool cluster_is_free(struct swap_cluster_info *info)
386 {                                                 248 {
387         return info->flags & CLUSTER_FLAG_FREE    249         return info->flags & CLUSTER_FLAG_FREE;
388 }                                                 250 }
389                                                   251 
390 static inline unsigned int cluster_index(struc !! 252 static inline bool cluster_is_null(struct swap_cluster_info *info)
391                                          struc << 
392 {                                                 253 {
393         return ci - si->cluster_info;          !! 254         return info->flags & CLUSTER_FLAG_NEXT_NULL;
394 }                                                 255 }
395                                                   256 
396 static inline unsigned int cluster_offset(stru !! 257 static inline void cluster_set_null(struct swap_cluster_info *info)
397                                           stru << 
398 {                                                 258 {
399         return cluster_index(si, ci) * SWAPFIL !! 259         info->flags = CLUSTER_FLAG_NEXT_NULL;
                                                   >> 260         info->data = 0;
400 }                                                 261 }
401                                                   262 
402 static inline struct swap_cluster_info *lock_c    263 static inline struct swap_cluster_info *lock_cluster(struct swap_info_struct *si,
403                                                   264                                                      unsigned long offset)
404 {                                                 265 {
405         struct swap_cluster_info *ci;             266         struct swap_cluster_info *ci;
406                                                   267 
407         ci = si->cluster_info;                    268         ci = si->cluster_info;
408         if (ci) {                                 269         if (ci) {
409                 ci += offset / SWAPFILE_CLUSTE    270                 ci += offset / SWAPFILE_CLUSTER;
410                 spin_lock(&ci->lock);             271                 spin_lock(&ci->lock);
411         }                                         272         }
412         return ci;                                273         return ci;
413 }                                                 274 }
414                                                   275 
415 static inline void unlock_cluster(struct swap_    276 static inline void unlock_cluster(struct swap_cluster_info *ci)
416 {                                                 277 {
417         if (ci)                                   278         if (ci)
418                 spin_unlock(&ci->lock);           279                 spin_unlock(&ci->lock);
419 }                                                 280 }
420                                                   281 
421 /*                                             << 
422  * Determine the locking method in use for thi << 
423  * swap_cluster_info if SSD-style cluster-base << 
424  */                                            << 
425 static inline struct swap_cluster_info *lock_c    282 static inline struct swap_cluster_info *lock_cluster_or_swap_info(
426                 struct swap_info_struct *si, u !! 283         struct swap_info_struct *si,
                                                   >> 284         unsigned long offset)
427 {                                                 285 {
428         struct swap_cluster_info *ci;             286         struct swap_cluster_info *ci;
429                                                   287 
430         /* Try to use fine-grained SSD-style l << 
431         ci = lock_cluster(si, offset);            288         ci = lock_cluster(si, offset);
432         /* Otherwise, fall back to traditional << 
433         if (!ci)                                  289         if (!ci)
434                 spin_lock(&si->lock);             290                 spin_lock(&si->lock);
435                                                   291 
436         return ci;                                292         return ci;
437 }                                                 293 }
438                                                   294 
439 static inline void unlock_cluster_or_swap_info    295 static inline void unlock_cluster_or_swap_info(struct swap_info_struct *si,
440                                                   296                                                struct swap_cluster_info *ci)
441 {                                                 297 {
442         if (ci)                                   298         if (ci)
443                 unlock_cluster(ci);               299                 unlock_cluster(ci);
444         else                                      300         else
445                 spin_unlock(&si->lock);           301                 spin_unlock(&si->lock);
446 }                                                 302 }
447                                                   303 
                                                   >> 304 static inline bool cluster_list_empty(struct swap_cluster_list *list)
                                                   >> 305 {
                                                   >> 306         return cluster_is_null(&list->head);
                                                   >> 307 }
                                                   >> 308 
                                                   >> 309 static inline unsigned int cluster_list_first(struct swap_cluster_list *list)
                                                   >> 310 {
                                                   >> 311         return cluster_next(&list->head);
                                                   >> 312 }
                                                   >> 313 
                                                   >> 314 static void cluster_list_init(struct swap_cluster_list *list)
                                                   >> 315 {
                                                   >> 316         cluster_set_null(&list->head);
                                                   >> 317         cluster_set_null(&list->tail);
                                                   >> 318 }
                                                   >> 319 
                                                   >> 320 static void cluster_list_add_tail(struct swap_cluster_list *list,
                                                   >> 321                                   struct swap_cluster_info *ci,
                                                   >> 322                                   unsigned int idx)
                                                   >> 323 {
                                                   >> 324         if (cluster_list_empty(list)) {
                                                   >> 325                 cluster_set_next_flag(&list->head, idx, 0);
                                                   >> 326                 cluster_set_next_flag(&list->tail, idx, 0);
                                                   >> 327         } else {
                                                   >> 328                 struct swap_cluster_info *ci_tail;
                                                   >> 329                 unsigned int tail = cluster_next(&list->tail);
                                                   >> 330 
                                                   >> 331                 /*
                                                   >> 332                  * Nested cluster lock, but both cluster locks are
                                                   >> 333                  * only acquired when we held swap_info_struct->lock
                                                   >> 334                  */
                                                   >> 335                 ci_tail = ci + tail;
                                                   >> 336                 spin_lock_nested(&ci_tail->lock, SINGLE_DEPTH_NESTING);
                                                   >> 337                 cluster_set_next(ci_tail, idx);
                                                   >> 338                 spin_unlock(&ci_tail->lock);
                                                   >> 339                 cluster_set_next_flag(&list->tail, idx, 0);
                                                   >> 340         }
                                                   >> 341 }
                                                   >> 342 
                                                   >> 343 static unsigned int cluster_list_del_first(struct swap_cluster_list *list,
                                                   >> 344                                            struct swap_cluster_info *ci)
                                                   >> 345 {
                                                   >> 346         unsigned int idx;
                                                   >> 347 
                                                   >> 348         idx = cluster_next(&list->head);
                                                   >> 349         if (cluster_next(&list->tail) == idx) {
                                                   >> 350                 cluster_set_null(&list->head);
                                                   >> 351                 cluster_set_null(&list->tail);
                                                   >> 352         } else
                                                   >> 353                 cluster_set_next_flag(&list->head,
                                                   >> 354                                       cluster_next(&ci[idx]), 0);
                                                   >> 355 
                                                   >> 356         return idx;
                                                   >> 357 }
                                                   >> 358 
448 /* Add a cluster to discard list and schedule     359 /* Add a cluster to discard list and schedule it to do discard */
449 static void swap_cluster_schedule_discard(stru    360 static void swap_cluster_schedule_discard(struct swap_info_struct *si,
450                 struct swap_cluster_info *ci)  !! 361                 unsigned int idx)
451 {                                                 362 {
452         unsigned int idx = cluster_index(si, c << 
453         /*                                        363         /*
454          * If scan_swap_map_slots() can't find !! 364          * If scan_swap_map() can't find a free cluster, it will check
455          * si->swap_map directly. To make sure    365          * si->swap_map directly. To make sure the discarding cluster isn't
456          * taken by scan_swap_map_slots(), mar !! 366          * taken by scan_swap_map(), mark the swap entries bad (occupied). It
457          * It will be cleared after discard    !! 367          * will be cleared after discard
458          */                                       368          */
459         memset(si->swap_map + idx * SWAPFILE_C    369         memset(si->swap_map + idx * SWAPFILE_CLUSTER,
460                         SWAP_MAP_BAD, SWAPFILE    370                         SWAP_MAP_BAD, SWAPFILE_CLUSTER);
461                                                   371 
462         VM_BUG_ON(ci->flags & CLUSTER_FLAG_FRE !! 372         cluster_list_add_tail(&si->discard_clusters, si->cluster_info, idx);
463         list_move_tail(&ci->list, &si->discard << 
464         ci->flags = 0;                         << 
465         schedule_work(&si->discard_work);      << 
466 }                                              << 
467                                                << 
468 static void __free_cluster(struct swap_info_st << 
469 {                                              << 
470         lockdep_assert_held(&si->lock);        << 
471         lockdep_assert_held(&ci->lock);        << 
472                                                   373 
473         if (ci->flags)                         !! 374         schedule_work(&si->discard_work);
474                 list_move_tail(&ci->list, &si- << 
475         else                                   << 
476                 list_add_tail(&ci->list, &si-> << 
477         ci->flags = CLUSTER_FLAG_FREE;         << 
478         ci->order = 0;                         << 
479 }                                                 375 }
480                                                   376 
481 /*                                                377 /*
482  * Doing discard actually. After a cluster dis    378  * Doing discard actually. After a cluster discard is finished, the cluster
483  * will be added to free cluster list. caller     379  * will be added to free cluster list. caller should hold si->lock.
484 */                                                380 */
485 static void swap_do_scheduled_discard(struct s    381 static void swap_do_scheduled_discard(struct swap_info_struct *si)
486 {                                                 382 {
487         struct swap_cluster_info *ci;          !! 383         struct swap_cluster_info *info, *ci;
488         unsigned int idx;                         384         unsigned int idx;
489                                                   385 
490         while (!list_empty(&si->discard_cluste !! 386         info = si->cluster_info;
491                 ci = list_first_entry(&si->dis !! 387 
492                 list_del(&ci->list);           !! 388         while (!cluster_list_empty(&si->discard_clusters)) {
493                 idx = cluster_index(si, ci);   !! 389                 idx = cluster_list_del_first(&si->discard_clusters, info);
494                 spin_unlock(&si->lock);           390                 spin_unlock(&si->lock);
495                                                   391 
496                 discard_swap_cluster(si, idx *    392                 discard_swap_cluster(si, idx * SWAPFILE_CLUSTER,
497                                 SWAPFILE_CLUST    393                                 SWAPFILE_CLUSTER);
498                                                   394 
499                 spin_lock(&si->lock);             395                 spin_lock(&si->lock);
500                 spin_lock(&ci->lock);          !! 396                 ci = lock_cluster(si, idx * SWAPFILE_CLUSTER);
501                 __free_cluster(si, ci);        !! 397                 cluster_set_flag(ci, CLUSTER_FLAG_FREE);
                                                   >> 398                 unlock_cluster(ci);
                                                   >> 399                 cluster_list_add_tail(&si->free_clusters, info, idx);
                                                   >> 400                 ci = lock_cluster(si, idx * SWAPFILE_CLUSTER);
502                 memset(si->swap_map + idx * SW    401                 memset(si->swap_map + idx * SWAPFILE_CLUSTER,
503                                 0, SWAPFILE_CL    402                                 0, SWAPFILE_CLUSTER);
504                 spin_unlock(&ci->lock);        !! 403                 unlock_cluster(ci);
505         }                                         404         }
506 }                                                 405 }
507                                                   406 
508 static void swap_discard_work(struct work_stru    407 static void swap_discard_work(struct work_struct *work)
509 {                                                 408 {
510         struct swap_info_struct *si;              409         struct swap_info_struct *si;
511                                                   410 
512         si = container_of(work, struct swap_in    411         si = container_of(work, struct swap_info_struct, discard_work);
513                                                   412 
514         spin_lock(&si->lock);                     413         spin_lock(&si->lock);
515         swap_do_scheduled_discard(si);            414         swap_do_scheduled_discard(si);
516         spin_unlock(&si->lock);                   415         spin_unlock(&si->lock);
517 }                                                 416 }
518                                                   417 
519 static void swap_users_ref_free(struct percpu_ << 
520 {                                              << 
521         struct swap_info_struct *si;           << 
522                                                << 
523         si = container_of(ref, struct swap_inf << 
524         complete(&si->comp);                   << 
525 }                                              << 
526                                                << 
527 static void free_cluster(struct swap_info_stru << 
528 {                                              << 
529         VM_BUG_ON(ci->count != 0);             << 
530         lockdep_assert_held(&si->lock);        << 
531         lockdep_assert_held(&ci->lock);        << 
532                                                << 
533         if (ci->flags & CLUSTER_FLAG_FRAG)     << 
534                 si->frag_cluster_nr[ci->order] << 
535                                                << 
536         /*                                     << 
537          * If the swap is discardable, prepare << 
538          * instead of free it immediately. The << 
539          * after discard.                      << 
540          */                                    << 
541         if ((si->flags & (SWP_WRITEOK | SWP_PA << 
542             (SWP_WRITEOK | SWP_PAGE_DISCARD))  << 
543                 swap_cluster_schedule_discard( << 
544                 return;                        << 
545         }                                      << 
546                                                << 
547         __free_cluster(si, ci);                << 
548 }                                              << 
549                                                << 
550 /*                                                418 /*
551  * The cluster corresponding to page_nr will b !! 419  * The cluster corresponding to page_nr will be used. The cluster will be
552  * added to free cluster list and its usage co !! 420  * removed from free cluster list and its usage counter will be increased.
553  * Only used for initialization.               << 
554  */                                               421  */
555 static void inc_cluster_info_page(struct swap_ !! 422 static void inc_cluster_info_page(struct swap_info_struct *p,
556         struct swap_cluster_info *cluster_info    423         struct swap_cluster_info *cluster_info, unsigned long page_nr)
557 {                                                 424 {
558         unsigned long idx = page_nr / SWAPFILE    425         unsigned long idx = page_nr / SWAPFILE_CLUSTER;
559         struct swap_cluster_info *ci;          << 
560                                                   426 
561         if (!cluster_info)                        427         if (!cluster_info)
562                 return;                           428                 return;
                                                   >> 429         if (cluster_is_free(&cluster_info[idx])) {
                                                   >> 430                 VM_BUG_ON(cluster_list_first(&p->free_clusters) != idx);
                                                   >> 431                 cluster_list_del_first(&p->free_clusters, cluster_info);
                                                   >> 432                 cluster_set_count_flag(&cluster_info[idx], 0, 0);
                                                   >> 433         }
563                                                   434 
564         ci = cluster_info + idx;               !! 435         VM_BUG_ON(cluster_count(&cluster_info[idx]) >= SWAPFILE_CLUSTER);
565         ci->count++;                           !! 436         cluster_set_count(&cluster_info[idx],
566                                                !! 437                 cluster_count(&cluster_info[idx]) + 1);
567         VM_BUG_ON(ci->count > SWAPFILE_CLUSTER << 
568         VM_BUG_ON(ci->flags);                  << 
569 }                                                 438 }
570                                                   439 
571 /*                                                440 /*
572  * The cluster ci decreases @nr_pages usage. I !! 441  * The cluster corresponding to page_nr decreases one usage. If the usage
573  * which means no page in the cluster is in us !! 442  * counter becomes 0, which means no page in the cluster is in using, we can
574  * the cluster and add it to free cluster list !! 443  * optionally discard the cluster and add it to free cluster list.
575  */                                               444  */
576 static void dec_cluster_info_page(struct swap_ !! 445 static void dec_cluster_info_page(struct swap_info_struct *p,
577                                   struct swap_ !! 446         struct swap_cluster_info *cluster_info, unsigned long page_nr)
578 {                                                 447 {
579         if (!si->cluster_info)                 !! 448         unsigned long idx = page_nr / SWAPFILE_CLUSTER;
580                 return;                        << 
581                                                << 
582         VM_BUG_ON(ci->count < nr_pages);       << 
583         VM_BUG_ON(cluster_is_free(ci));        << 
584         lockdep_assert_held(&si->lock);        << 
585         lockdep_assert_held(&ci->lock);        << 
586         ci->count -= nr_pages;                 << 
587                                                   449 
588         if (!ci->count) {                      !! 450         if (!cluster_info)
589                 free_cluster(si, ci);          << 
590                 return;                           451                 return;
591         }                                      << 
592                                                << 
593         if (!(ci->flags & CLUSTER_FLAG_NONFULL << 
594                 VM_BUG_ON(ci->flags & CLUSTER_ << 
595                 if (ci->flags & CLUSTER_FLAG_F << 
596                         si->frag_cluster_nr[ci << 
597                 list_move_tail(&ci->list, &si- << 
598                 ci->flags = CLUSTER_FLAG_NONFU << 
599         }                                      << 
600 }                                              << 
601                                                << 
602 static bool cluster_reclaim_range(struct swap_ << 
603                                   struct swap_ << 
604                                   unsigned lon << 
605 {                                              << 
606         unsigned char *map = si->swap_map;     << 
607         unsigned long offset;                  << 
608                                                << 
609         spin_unlock(&ci->lock);                << 
610         spin_unlock(&si->lock);                << 
611                                                << 
612         for (offset = start; offset < end; off << 
613                 switch (READ_ONCE(map[offset]) << 
614                 case 0:                        << 
615                         continue;              << 
616                 case SWAP_HAS_CACHE:           << 
617                         if (__try_to_reclaim_s << 
618                                 continue;      << 
619                         goto out;              << 
620                 default:                       << 
621                         goto out;              << 
622                 }                              << 
623         }                                      << 
624 out:                                           << 
625         spin_lock(&si->lock);                  << 
626         spin_lock(&ci->lock);                  << 
627                                                   452 
628         /*                                     !! 453         VM_BUG_ON(cluster_count(&cluster_info[idx]) == 0);
629          * Recheck the range no matter reclaim !! 454         cluster_set_count(&cluster_info[idx],
630          * could have been be freed while we a !! 455                 cluster_count(&cluster_info[idx]) - 1);
631          */                                    !! 456 
632         for (offset = start; offset < end; off !! 457         if (cluster_count(&cluster_info[idx]) == 0) {
633                 if (READ_ONCE(map[offset]))    !! 458                 /*
634                         return false;          !! 459                  * If the swap is discardable, prepare discard the cluster
635                                                !! 460                  * instead of free it immediately. The cluster will be freed
636         return true;                           !! 461                  * after discard.
637 }                                              !! 462                  */
638                                                !! 463                 if ((p->flags & (SWP_WRITEOK | SWP_PAGE_DISCARD)) ==
639 static bool cluster_scan_range(struct swap_inf !! 464                                  (SWP_WRITEOK | SWP_PAGE_DISCARD)) {
640                                struct swap_clu !! 465                         swap_cluster_schedule_discard(p, idx);
641                                unsigned long s !! 466                         return;
642 {                                              << 
643         unsigned long offset, end = start + nr << 
644         unsigned char *map = si->swap_map;     << 
645         bool need_reclaim = false;             << 
646                                                << 
647         for (offset = start; offset < end; off << 
648                 switch (READ_ONCE(map[offset]) << 
649                 case 0:                        << 
650                         continue;              << 
651                 case SWAP_HAS_CACHE:           << 
652                         if (!vm_swap_full())   << 
653                                 return false;  << 
654                         need_reclaim = true;   << 
655                         continue;              << 
656                 default:                       << 
657                         return false;          << 
658                 }                              << 
659         }                                      << 
660                                                << 
661         if (need_reclaim)                      << 
662                 return cluster_reclaim_range(s << 
663                                                << 
664         return true;                           << 
665 }                                              << 
666                                                << 
667 static void cluster_alloc_range(struct swap_in << 
668                                 unsigned int s << 
669                                 unsigned int o << 
670 {                                              << 
671         unsigned int nr_pages = 1 << order;    << 
672                                                << 
673         if (cluster_is_free(ci)) {             << 
674                 if (nr_pages < SWAPFILE_CLUSTE << 
675                         list_move_tail(&ci->li << 
676                         ci->flags = CLUSTER_FL << 
677                 }                                 467                 }
678                 ci->order = order;             << 
679         }                                      << 
680                                                << 
681         memset(si->swap_map + start, usage, nr << 
682         swap_range_alloc(si, start, nr_pages); << 
683         ci->count += nr_pages;                 << 
684                                                   468 
685         if (ci->count == SWAPFILE_CLUSTER) {   !! 469                 cluster_set_flag(&cluster_info[idx], CLUSTER_FLAG_FREE);
686                 VM_BUG_ON(!(ci->flags &        !! 470                 cluster_list_add_tail(&p->free_clusters, cluster_info, idx);
687                           (CLUSTER_FLAG_FREE | << 
688                 if (ci->flags & CLUSTER_FLAG_F << 
689                         si->frag_cluster_nr[ci << 
690                 list_move_tail(&ci->list, &si- << 
691                 ci->flags = CLUSTER_FLAG_FULL; << 
692         }                                      << 
693 }                                              << 
694                                                << 
695 static unsigned int alloc_swap_scan_cluster(st << 
696                                             un << 
697                                             un << 
698 {                                              << 
699         unsigned long start = offset & ~(SWAPF << 
700         unsigned long end = min(start + SWAPFI << 
701         unsigned int nr_pages = 1 << order;    << 
702         struct swap_cluster_info *ci;          << 
703                                                << 
704         if (end < nr_pages)                    << 
705                 return SWAP_NEXT_INVALID;      << 
706         end -= nr_pages;                       << 
707                                                << 
708         ci = lock_cluster(si, offset);         << 
709         if (ci->count + nr_pages > SWAPFILE_CL << 
710                 offset = SWAP_NEXT_INVALID;    << 
711                 goto done;                     << 
712         }                                      << 
713                                                << 
714         while (offset <= end) {                << 
715                 if (cluster_scan_range(si, ci, << 
716                         cluster_alloc_range(si << 
717                         *foundp = offset;      << 
718                         if (ci->count == SWAPF << 
719                                 offset = SWAP_ << 
720                                 goto done;     << 
721                         }                      << 
722                         offset += nr_pages;    << 
723                         break;                 << 
724                 }                              << 
725                 offset += nr_pages;            << 
726         }                                         471         }
727         if (offset > end)                      << 
728                 offset = SWAP_NEXT_INVALID;    << 
729 done:                                          << 
730         unlock_cluster(ci);                    << 
731         return offset;                         << 
732 }                                                 472 }
733                                                   473 
734 /* Return true if reclaimed a whole cluster */ !! 474 /*
735 static void swap_reclaim_full_clusters(struct  !! 475  * It's possible scan_swap_map() uses a free cluster in the middle of free
                                                   >> 476  * cluster list. Avoiding such abuse to avoid list corruption.
                                                   >> 477  */
                                                   >> 478 static bool
                                                   >> 479 scan_swap_map_ssd_cluster_conflict(struct swap_info_struct *si,
                                                   >> 480         unsigned long offset)
736 {                                                 481 {
737         long to_scan = 1;                      !! 482         struct percpu_cluster *percpu_cluster;
738         unsigned long offset, end;             !! 483         bool conflict;
739         struct swap_cluster_info *ci;          << 
740         unsigned char *map = si->swap_map;     << 
741         int nr_reclaim;                        << 
742                                                << 
743         if (force)                             << 
744                 to_scan = si->inuse_pages / SW << 
745                                                << 
746         while (!list_empty(&si->full_clusters) << 
747                 ci = list_first_entry(&si->ful << 
748                 list_move_tail(&ci->list, &si- << 
749                 offset = cluster_offset(si, ci << 
750                 end = min(si->max, offset + SW << 
751                 to_scan--;                     << 
752                                                << 
753                 spin_unlock(&si->lock);        << 
754                 while (offset < end) {         << 
755                         if (READ_ONCE(map[offs << 
756                                 nr_reclaim = _ << 
757                                                << 
758                                 if (nr_reclaim << 
759                                         offset << 
760                                         contin << 
761                                 }              << 
762                         }                      << 
763                         offset++;              << 
764                 }                              << 
765                 spin_lock(&si->lock);          << 
766                                                << 
767                 if (to_scan <= 0)              << 
768                         break;                 << 
769         }                                      << 
770 }                                              << 
771                                                   484 
772 static void swap_reclaim_work(struct work_stru !! 485         offset /= SWAPFILE_CLUSTER;
773 {                                              !! 486         conflict = !cluster_list_empty(&si->free_clusters) &&
774         struct swap_info_struct *si;           !! 487                 offset != cluster_list_first(&si->free_clusters) &&
                                                   >> 488                 cluster_is_free(&si->cluster_info[offset]);
775                                                   489 
776         si = container_of(work, struct swap_in !! 490         if (!conflict)
                                                   >> 491                 return false;
777                                                   492 
778         spin_lock(&si->lock);                  !! 493         percpu_cluster = this_cpu_ptr(si->percpu_cluster);
779         swap_reclaim_full_clusters(si, true);  !! 494         cluster_set_null(&percpu_cluster->index);
780         spin_unlock(&si->lock);                !! 495         return true;
781 }                                                 496 }
782                                                   497 
783 /*                                                498 /*
784  * Try to get swap entries with specified orde !! 499  * Try to get a swap entry from current cpu's swap entry pool (a cluster). This
785  * pool (a cluster). This might involve alloca !! 500  * might involve allocating a new cluster for current CPU too.
786  * too.                                        << 
787  */                                               501  */
788 static unsigned long cluster_alloc_swap_entry( !! 502 static bool scan_swap_map_try_ssd_cluster(struct swap_info_struct *si,
789                                                !! 503         unsigned long *offset, unsigned long *scan_base)
790 {                                                 504 {
791         struct percpu_cluster *cluster;           505         struct percpu_cluster *cluster;
792         struct swap_cluster_info *ci;             506         struct swap_cluster_info *ci;
793         unsigned int offset, found = 0;        !! 507         bool found_free;
                                                   >> 508         unsigned long tmp, max;
794                                                   509 
795 new_cluster:                                      510 new_cluster:
796         lockdep_assert_held(&si->lock);        << 
797         cluster = this_cpu_ptr(si->percpu_clus    511         cluster = this_cpu_ptr(si->percpu_cluster);
798         offset = cluster->next[order];         !! 512         if (cluster_is_null(&cluster->index)) {
799         if (offset) {                          !! 513                 if (!cluster_list_empty(&si->free_clusters)) {
800                 offset = alloc_swap_scan_clust !! 514                         cluster->index = si->free_clusters.head;
801                 if (found)                     !! 515                         cluster->next = cluster_next(&cluster->index) *
802                         goto done;             !! 516                                         SWAPFILE_CLUSTER;
803         }                                      !! 517                 } else if (!cluster_list_empty(&si->discard_clusters)) {
804                                                << 
805         if (!list_empty(&si->free_clusters)) { << 
806                 ci = list_first_entry(&si->fre << 
807                 offset = alloc_swap_scan_clust << 
808                 VM_BUG_ON(!found);             << 
809                 goto done;                     << 
810         }                                      << 
811                                                << 
812         /* Try reclaim from full clusters if f << 
813         if (vm_swap_full())                    << 
814                 swap_reclaim_full_clusters(si, << 
815                                                << 
816         if (order < PMD_ORDER) {               << 
817                 unsigned int frags = 0;        << 
818                                                << 
819                 while (!list_empty(&si->nonful << 
820                         ci = list_first_entry( << 
821                                                << 
822                         list_move_tail(&ci->li << 
823                         ci->flags = CLUSTER_FL << 
824                         si->frag_cluster_nr[or << 
825                         offset = alloc_swap_sc << 
826                                                << 
827                         frags++;               << 
828                         if (found)             << 
829                                 break;         << 
830                 }                              << 
831                                                << 
832                 if (!found) {                  << 
833                         /*                        518                         /*
834                          * Nonfull clusters ar !! 519                          * we don't have free cluster but have some clusters in
835                          * here, count them to !! 520                          * discarding, do discard now and reclaim them
836                          */                       521                          */
837                         while (frags < si->fra !! 522                         swap_do_scheduled_discard(si);
838                                 ci = list_firs !! 523                         *scan_base = *offset = si->cluster_next;
839                                                !! 524                         goto new_cluster;
840                                 /*             !! 525                 } else
841                                  * Rotate the  !! 526                         return false;
842                                  * high order  << 
843                                  * this help k << 
844                                  */            << 
845                                 list_move_tail << 
846                                 offset = alloc << 
847                                                << 
848                                 frags++;       << 
849                                 if (found)     << 
850                                         break; << 
851                         }                      << 
852                 }                              << 
853         }                                      << 
854                                                << 
855         if (found)                             << 
856                 goto done;                     << 
857                                                << 
858         if (!list_empty(&si->discard_clusters) << 
859                 /*                             << 
860                  * we don't have free cluster  << 
861                  * discarding, do discard now  << 
862                  * reread cluster_next_cpu sin << 
863                  */                            << 
864                 swap_do_scheduled_discard(si); << 
865                 goto new_cluster;              << 
866         }                                      << 
867                                                << 
868         if (order)                             << 
869                 goto done;                     << 
870                                                << 
871         /* Order 0 stealing from higher order  << 
872         for (int o = 1; o < SWAP_NR_ORDERS; o+ << 
873                 /*                             << 
874                  * Clusters here have at least << 
875                  * allocation, but reclaim may << 
876                  */                            << 
877                 while (!list_empty(&si->frag_c << 
878                         ci = list_first_entry( << 
879                                                << 
880                         offset = alloc_swap_sc << 
881                                                << 
882                         if (found)             << 
883                                 goto done;     << 
884                 }                              << 
885                                                << 
886                 while (!list_empty(&si->nonful << 
887                         ci = list_first_entry( << 
888                                                << 
889                         offset = alloc_swap_sc << 
890                                                << 
891                         if (found)             << 
892                                 goto done;     << 
893                 }                              << 
894         }                                      << 
895                                                << 
896 done:                                          << 
897         cluster->next[order] = offset;         << 
898         return found;                          << 
899 }                                              << 
900                                                << 
901 static void __del_from_avail_list(struct swap_ << 
902 {                                              << 
903         int nid;                               << 
904                                                << 
905         assert_spin_locked(&si->lock);         << 
906         for_each_node(nid)                     << 
907                 plist_del(&si->avail_lists[nid << 
908 }                                              << 
909                                                << 
910 static void del_from_avail_list(struct swap_in << 
911 {                                              << 
912         spin_lock(&swap_avail_lock);           << 
913         __del_from_avail_list(si);             << 
914         spin_unlock(&swap_avail_lock);         << 
915 }                                              << 
916                                                << 
917 static void swap_range_alloc(struct swap_info_ << 
918                              unsigned int nr_e << 
919 {                                              << 
920         unsigned int end = offset + nr_entries << 
921                                                << 
922         if (offset == si->lowest_bit)          << 
923                 si->lowest_bit += nr_entries;  << 
924         if (end == si->highest_bit)            << 
925                 WRITE_ONCE(si->highest_bit, si << 
926         WRITE_ONCE(si->inuse_pages, si->inuse_ << 
927         if (si->inuse_pages == si->pages) {    << 
928                 si->lowest_bit = si->max;      << 
929                 si->highest_bit = 0;           << 
930                 del_from_avail_list(si);       << 
931                                                << 
932                 if (vm_swap_full())            << 
933                         schedule_work(&si->rec << 
934         }                                         527         }
935 }                                              << 
936                                                << 
937 static void add_to_avail_list(struct swap_info << 
938 {                                              << 
939         int nid;                               << 
940                                                << 
941         spin_lock(&swap_avail_lock);           << 
942         for_each_node(nid)                     << 
943                 plist_add(&si->avail_lists[nid << 
944         spin_unlock(&swap_avail_lock);         << 
945 }                                              << 
946                                                << 
947 static void swap_range_free(struct swap_info_s << 
948                             unsigned int nr_en << 
949 {                                              << 
950         unsigned long begin = offset;          << 
951         unsigned long end = offset + nr_entrie << 
952         void (*swap_slot_free_notify)(struct b << 
953         unsigned int i;                        << 
954                                                << 
955         /*                                     << 
956          * Use atomic clear_bit operations onl << 
957          * bitmap_clear to prevent adjacent bi << 
958          */                                    << 
959         for (i = 0; i < nr_entries; i++)       << 
960                 clear_bit(offset + i, si->zero << 
961                                                   528 
962         if (offset < si->lowest_bit)           !! 529         found_free = false;
963                 si->lowest_bit = offset;       << 
964         if (end > si->highest_bit) {           << 
965                 bool was_full = !si->highest_b << 
966                                                << 
967                 WRITE_ONCE(si->highest_bit, en << 
968                 if (was_full && (si->flags & S << 
969                         add_to_avail_list(si); << 
970         }                                      << 
971         if (si->flags & SWP_BLKDEV)            << 
972                 swap_slot_free_notify =        << 
973                         si->bdev->bd_disk->fop << 
974         else                                   << 
975                 swap_slot_free_notify = NULL;  << 
976         while (offset <= end) {                << 
977                 arch_swap_invalidate_page(si-> << 
978                 if (swap_slot_free_notify)     << 
979                         swap_slot_free_notify( << 
980                 offset++;                      << 
981         }                                      << 
982         clear_shadow_from_swap_cache(si->type, << 
983                                                   530 
984         /*                                        531         /*
985          * Make sure that try_to_unuse() obser !! 532          * Other CPUs can use our cluster if they can't find a free cluster,
986          * only after the above cleanups are d !! 533          * check if there is still free entry in the cluster
987          */                                       534          */
988         smp_wmb();                             !! 535         tmp = cluster->next;
989         atomic_long_add(nr_entries, &nr_swap_p !! 536         max = min_t(unsigned long, si->max,
990         WRITE_ONCE(si->inuse_pages, si->inuse_ !! 537                     (cluster_next(&cluster->index) + 1) * SWAPFILE_CLUSTER);
991 }                                              !! 538         if (tmp >= max) {
992                                                !! 539                 cluster_set_null(&cluster->index);
993 static void set_cluster_next(struct swap_info_ !! 540                 goto new_cluster;
994 {                                              << 
995         unsigned long prev;                    << 
996                                                << 
997         if (!(si->flags & SWP_SOLIDSTATE)) {   << 
998                 si->cluster_next = next;       << 
999                 return;                        << 
1000         }                                     << 
1001                                               << 
1002         prev = this_cpu_read(*si->cluster_nex << 
1003         /*                                    << 
1004          * Cross the swap address space size  << 
1005          * another trunk randomly to avoid lo << 
1006          * address space if possible.         << 
1007          */                                   << 
1008         if ((prev >> SWAP_ADDRESS_SPACE_SHIFT << 
1009             (next >> SWAP_ADDRESS_SPACE_SHIFT << 
1010                 /* No free swap slots availab << 
1011                 if (si->highest_bit <= si->lo << 
1012                         return;               << 
1013                 next = get_random_u32_inclusi << 
1014                 next = ALIGN_DOWN(next, SWAP_ << 
1015                 next = max_t(unsigned int, ne << 
1016         }                                     << 
1017         this_cpu_write(*si->cluster_next_cpu, << 
1018 }                                             << 
1019                                               << 
1020 static bool swap_offset_available_and_locked( << 
1021                                               << 
1022 {                                             << 
1023         if (data_race(!si->swap_map[offset])) << 
1024                 spin_lock(&si->lock);         << 
1025                 return true;                  << 
1026         }                                     << 
1027                                               << 
1028         if (vm_swap_full() && READ_ONCE(si->s << 
1029                 spin_lock(&si->lock);         << 
1030                 return true;                  << 
1031         }                                        541         }
1032                                               !! 542         ci = lock_cluster(si, tmp);
1033         return false;                         !! 543         while (tmp < max) {
1034 }                                             !! 544                 if (!si->swap_map[tmp]) {
1035                                               !! 545                         found_free = true;
1036 static int cluster_alloc_swap(struct swap_inf << 
1037                              unsigned char us << 
1038                              swp_entry_t slot << 
1039 {                                             << 
1040         int n_ret = 0;                        << 
1041                                               << 
1042         VM_BUG_ON(!si->cluster_info);         << 
1043                                               << 
1044         while (n_ret < nr) {                  << 
1045                 unsigned long offset = cluste << 
1046                                               << 
1047                 if (!offset)                  << 
1048                         break;                   546                         break;
1049                 slots[n_ret++] = swp_entry(si !! 547                 }
                                                   >> 548                 tmp++;
1050         }                                        549         }
1051                                               !! 550         unlock_cluster(ci);
1052         return n_ret;                         !! 551         if (!found_free) {
                                                   >> 552                 cluster_set_null(&cluster->index);
                                                   >> 553                 goto new_cluster;
                                                   >> 554         }
                                                   >> 555         cluster->next = tmp + 1;
                                                   >> 556         *offset = tmp;
                                                   >> 557         *scan_base = tmp;
                                                   >> 558         return found_free;
1053 }                                                559 }
1054                                                  560 
1055 static int scan_swap_map_slots(struct swap_in    561 static int scan_swap_map_slots(struct swap_info_struct *si,
1056                                unsigned char     562                                unsigned char usage, int nr,
1057                                swp_entry_t sl !! 563                                swp_entry_t slots[])
1058 {                                                564 {
                                                   >> 565         struct swap_cluster_info *ci;
1059         unsigned long offset;                    566         unsigned long offset;
1060         unsigned long scan_base;                 567         unsigned long scan_base;
1061         unsigned long last_in_cluster = 0;       568         unsigned long last_in_cluster = 0;
1062         int latency_ration = LATENCY_LIMIT;      569         int latency_ration = LATENCY_LIMIT;
1063         unsigned int nr_pages = 1 << order;   << 
1064         int n_ret = 0;                           570         int n_ret = 0;
1065         bool scanned_many = false;            !! 571 
                                                   >> 572         if (nr > SWAP_BATCH)
                                                   >> 573                 nr = SWAP_BATCH;
1066                                                  574 
1067         /*                                       575         /*
1068          * We try to cluster swap pages by al    576          * We try to cluster swap pages by allocating them sequentially
1069          * in swap.  Once we've allocated SWA    577          * in swap.  Once we've allocated SWAPFILE_CLUSTER pages this
1070          * way, however, we resort to first-f    578          * way, however, we resort to first-free allocation, starting
1071          * a new cluster.  This prevents us f    579          * a new cluster.  This prevents us from scattering swap pages
1072          * all over the entire swap partition    580          * all over the entire swap partition, so that we reduce
1073          * overall disk seek times between sw    581          * overall disk seek times between swap pages.  -- sct
1074          * But we do now try to find an empty    582          * But we do now try to find an empty cluster.  -Andrea
1075          * And we let swap pages go all over     583          * And we let swap pages go all over an SSD partition.  Hugh
1076          */                                      584          */
1077                                                  585 
1078         if (order > 0) {                      << 
1079                 /*                            << 
1080                  * Should not even be attempt << 
1081                  * page swap is disabled.  Wa << 
1082                  */                           << 
1083                 if (!IS_ENABLED(CONFIG_THP_SW << 
1084                     nr_pages > SWAPFILE_CLUST << 
1085                         VM_WARN_ON_ONCE(1);   << 
1086                         return 0;             << 
1087                 }                             << 
1088                                               << 
1089                 /*                            << 
1090                  * Swapfile is not block devi << 
1091                  * to allocate large entries. << 
1092                  */                           << 
1093                 if (!(si->flags & SWP_BLKDEV) << 
1094                         return 0;             << 
1095         }                                     << 
1096                                               << 
1097         if (si->cluster_info)                 << 
1098                 return cluster_alloc_swap(si, << 
1099                                               << 
1100         si->flags += SWP_SCANNING;               586         si->flags += SWP_SCANNING;
                                                   >> 587         scan_base = offset = si->cluster_next;
1101                                                  588 
1102         /* For HDD, sequential access is more !! 589         /* SSD algorithm */
1103         scan_base = si->cluster_next;         !! 590         if (si->cluster_info) {
1104         offset = scan_base;                   !! 591                 if (scan_swap_map_try_ssd_cluster(si, &offset, &scan_base))
                                                   >> 592                         goto checks;
                                                   >> 593                 else
                                                   >> 594                         goto scan;
                                                   >> 595         }
1105                                                  596 
1106         if (unlikely(!si->cluster_nr--)) {       597         if (unlikely(!si->cluster_nr--)) {
1107                 if (si->pages - si->inuse_pag    598                 if (si->pages - si->inuse_pages < SWAPFILE_CLUSTER) {
1108                         si->cluster_nr = SWAP    599                         si->cluster_nr = SWAPFILE_CLUSTER - 1;
1109                         goto checks;             600                         goto checks;
1110                 }                                601                 }
1111                                                  602 
1112                 spin_unlock(&si->lock);          603                 spin_unlock(&si->lock);
1113                                                  604 
1114                 /*                               605                 /*
1115                  * If seek is expensive, star    606                  * If seek is expensive, start searching for new cluster from
1116                  * start of partition, to min    607                  * start of partition, to minimize the span of allocated swap.
                                                   >> 608                  * If seek is cheap, that is the SWP_SOLIDSTATE si->cluster_info
                                                   >> 609                  * case, just handled by scan_swap_map_try_ssd_cluster() above.
1117                  */                              610                  */
1118                 scan_base = offset = si->lowe    611                 scan_base = offset = si->lowest_bit;
1119                 last_in_cluster = offset + SW    612                 last_in_cluster = offset + SWAPFILE_CLUSTER - 1;
1120                                                  613 
1121                 /* Locate the first empty (un    614                 /* Locate the first empty (unaligned) cluster */
1122                 for (; last_in_cluster <= REA !! 615                 for (; last_in_cluster <= si->highest_bit; offset++) {
1123                         if (si->swap_map[offs    616                         if (si->swap_map[offset])
1124                                 last_in_clust    617                                 last_in_cluster = offset + SWAPFILE_CLUSTER;
1125                         else if (offset == la    618                         else if (offset == last_in_cluster) {
1126                                 spin_lock(&si    619                                 spin_lock(&si->lock);
1127                                 offset -= SWA    620                                 offset -= SWAPFILE_CLUSTER - 1;
1128                                 si->cluster_n    621                                 si->cluster_next = offset;
1129                                 si->cluster_n    622                                 si->cluster_nr = SWAPFILE_CLUSTER - 1;
1130                                 goto checks;     623                                 goto checks;
1131                         }                        624                         }
1132                         if (unlikely(--latenc    625                         if (unlikely(--latency_ration < 0)) {
1133                                 cond_resched(    626                                 cond_resched();
1134                                 latency_ratio    627                                 latency_ration = LATENCY_LIMIT;
1135                         }                        628                         }
1136                 }                                629                 }
1137                                                  630 
1138                 offset = scan_base;              631                 offset = scan_base;
1139                 spin_lock(&si->lock);            632                 spin_lock(&si->lock);
1140                 si->cluster_nr = SWAPFILE_CLU    633                 si->cluster_nr = SWAPFILE_CLUSTER - 1;
1141         }                                        634         }
1142                                                  635 
1143 checks:                                          636 checks:
                                                   >> 637         if (si->cluster_info) {
                                                   >> 638                 while (scan_swap_map_ssd_cluster_conflict(si, offset)) {
                                                   >> 639                 /* take a break if we already got some slots */
                                                   >> 640                         if (n_ret)
                                                   >> 641                                 goto done;
                                                   >> 642                         if (!scan_swap_map_try_ssd_cluster(si, &offset,
                                                   >> 643                                                         &scan_base))
                                                   >> 644                                 goto scan;
                                                   >> 645                 }
                                                   >> 646         }
1144         if (!(si->flags & SWP_WRITEOK))          647         if (!(si->flags & SWP_WRITEOK))
1145                 goto no_page;                    648                 goto no_page;
1146         if (!si->highest_bit)                    649         if (!si->highest_bit)
1147                 goto no_page;                    650                 goto no_page;
1148         if (offset > si->highest_bit)            651         if (offset > si->highest_bit)
1149                 scan_base = offset = si->lowe    652                 scan_base = offset = si->lowest_bit;
1150                                                  653 
                                                   >> 654         ci = lock_cluster(si, offset);
1151         /* reuse swap entry of cache-only swa    655         /* reuse swap entry of cache-only swap if not busy. */
1152         if (vm_swap_full() && si->swap_map[of    656         if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
1153                 int swap_was_freed;              657                 int swap_was_freed;
                                                   >> 658                 unlock_cluster(ci);
1154                 spin_unlock(&si->lock);          659                 spin_unlock(&si->lock);
1155                 swap_was_freed = __try_to_rec !! 660                 swap_was_freed = __try_to_reclaim_swap(si, offset);
1156                 spin_lock(&si->lock);            661                 spin_lock(&si->lock);
1157                 /* entry was freed successful    662                 /* entry was freed successfully, try to use this again */
1158                 if (swap_was_freed > 0)       !! 663                 if (swap_was_freed)
1159                         goto checks;             664                         goto checks;
1160                 goto scan; /* check next one     665                 goto scan; /* check next one */
1161         }                                        666         }
1162                                                  667 
1163         if (si->swap_map[offset]) {              668         if (si->swap_map[offset]) {
                                                   >> 669                 unlock_cluster(ci);
1164                 if (!n_ret)                      670                 if (!n_ret)
1165                         goto scan;               671                         goto scan;
1166                 else                             672                 else
1167                         goto done;               673                         goto done;
1168         }                                        674         }
1169         memset(si->swap_map + offset, usage,  !! 675         si->swap_map[offset] = usage;
                                                   >> 676         inc_cluster_info_page(si, si->cluster_info, offset);
                                                   >> 677         unlock_cluster(ci);
1170                                                  678 
1171         swap_range_alloc(si, offset, nr_pages !! 679         if (offset == si->lowest_bit)
                                                   >> 680                 si->lowest_bit++;
                                                   >> 681         if (offset == si->highest_bit)
                                                   >> 682                 si->highest_bit--;
                                                   >> 683         si->inuse_pages++;
                                                   >> 684         if (si->inuse_pages == si->pages) {
                                                   >> 685                 si->lowest_bit = si->max;
                                                   >> 686                 si->highest_bit = 0;
                                                   >> 687                 spin_lock(&swap_avail_lock);
                                                   >> 688                 plist_del(&si->avail_list, &swap_avail_head);
                                                   >> 689                 spin_unlock(&swap_avail_lock);
                                                   >> 690         }
                                                   >> 691         si->cluster_next = offset + 1;
1172         slots[n_ret++] = swp_entry(si->type,     692         slots[n_ret++] = swp_entry(si->type, offset);
1173                                                  693 
1174         /* got enough slots or reach max slot    694         /* got enough slots or reach max slots? */
1175         if ((n_ret == nr) || (offset >= si->h    695         if ((n_ret == nr) || (offset >= si->highest_bit))
1176                 goto done;                       696                 goto done;
1177                                                  697 
1178         /* search for next available slot */     698         /* search for next available slot */
1179                                                  699 
1180         /* time to take a break? */              700         /* time to take a break? */
1181         if (unlikely(--latency_ration < 0)) {    701         if (unlikely(--latency_ration < 0)) {
1182                 if (n_ret)                       702                 if (n_ret)
1183                         goto done;               703                         goto done;
1184                 spin_unlock(&si->lock);          704                 spin_unlock(&si->lock);
1185                 cond_resched();                  705                 cond_resched();
1186                 spin_lock(&si->lock);            706                 spin_lock(&si->lock);
1187                 latency_ration = LATENCY_LIMI    707                 latency_ration = LATENCY_LIMIT;
1188         }                                        708         }
1189                                                  709 
1190         if (si->cluster_nr && !si->swap_map[+ !! 710         /* try to get more slots in cluster */
1191                 /* non-ssd case, still more s !! 711         if (si->cluster_info) {
1192                 --si->cluster_nr;             !! 712                 if (scan_swap_map_try_ssd_cluster(si, &offset, &scan_base))
1193                 goto checks;                  !! 713                         goto checks;
                                                   >> 714                 else
                                                   >> 715                         goto done;
1194         }                                        716         }
                                                   >> 717         /* non-ssd case */
                                                   >> 718         ++offset;
1195                                                  719 
1196         /*                                    !! 720         /* non-ssd case, still more slots in cluster? */
1197          * Even if there's no free clusters a !! 721         if (si->cluster_nr && !si->swap_map[offset]) {
1198          * try to scan a little more quickly  !! 722                 --si->cluster_nr;
1199          * have scanned too many slots alread !! 723                 goto checks;
1200          */                                   << 
1201         if (!scanned_many) {                  << 
1202                 unsigned long scan_limit;     << 
1203                                               << 
1204                 if (offset < scan_base)       << 
1205                         scan_limit = scan_bas << 
1206                 else                          << 
1207                         scan_limit = si->high << 
1208                 for (; offset <= scan_limit & << 
1209                      offset++) {              << 
1210                         if (!si->swap_map[off << 
1211                                 goto checks;  << 
1212                 }                             << 
1213         }                                        724         }
1214                                                  725 
1215 done:                                            726 done:
1216         if (order == 0)                       << 
1217                 set_cluster_next(si, offset + << 
1218         si->flags -= SWP_SCANNING;               727         si->flags -= SWP_SCANNING;
1219         return n_ret;                            728         return n_ret;
1220                                                  729 
1221 scan:                                            730 scan:
1222         VM_WARN_ON(order > 0);                << 
1223         spin_unlock(&si->lock);                  731         spin_unlock(&si->lock);
1224         while (++offset <= READ_ONCE(si->high !! 732         while (++offset <= si->highest_bit) {
                                                   >> 733                 if (!si->swap_map[offset]) {
                                                   >> 734                         spin_lock(&si->lock);
                                                   >> 735                         goto checks;
                                                   >> 736                 }
                                                   >> 737                 if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
                                                   >> 738                         spin_lock(&si->lock);
                                                   >> 739                         goto checks;
                                                   >> 740                 }
1225                 if (unlikely(--latency_ration    741                 if (unlikely(--latency_ration < 0)) {
1226                         cond_resched();          742                         cond_resched();
1227                         latency_ration = LATE    743                         latency_ration = LATENCY_LIMIT;
1228                         scanned_many = true;  << 
1229                 }                                744                 }
1230                 if (swap_offset_available_and << 
1231                         goto checks;          << 
1232         }                                        745         }
1233         offset = si->lowest_bit;                 746         offset = si->lowest_bit;
1234         while (offset < scan_base) {             747         while (offset < scan_base) {
                                                   >> 748                 if (!si->swap_map[offset]) {
                                                   >> 749                         spin_lock(&si->lock);
                                                   >> 750                         goto checks;
                                                   >> 751                 }
                                                   >> 752                 if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
                                                   >> 753                         spin_lock(&si->lock);
                                                   >> 754                         goto checks;
                                                   >> 755                 }
1235                 if (unlikely(--latency_ration    756                 if (unlikely(--latency_ration < 0)) {
1236                         cond_resched();          757                         cond_resched();
1237                         latency_ration = LATE    758                         latency_ration = LATENCY_LIMIT;
1238                         scanned_many = true;  << 
1239                 }                                759                 }
1240                 if (swap_offset_available_and << 
1241                         goto checks;          << 
1242                 offset++;                        760                 offset++;
1243         }                                        761         }
1244         spin_lock(&si->lock);                    762         spin_lock(&si->lock);
1245                                                  763 
1246 no_page:                                         764 no_page:
1247         si->flags -= SWP_SCANNING;               765         si->flags -= SWP_SCANNING;
1248         return n_ret;                            766         return n_ret;
1249 }                                                767 }
1250                                                  768 
1251 int get_swap_pages(int n_goal, swp_entry_t sw !! 769 static unsigned long scan_swap_map(struct swap_info_struct *si,
                                                   >> 770                                    unsigned char usage)
                                                   >> 771 {
                                                   >> 772         swp_entry_t entry;
                                                   >> 773         int n_ret;
                                                   >> 774 
                                                   >> 775         n_ret = scan_swap_map_slots(si, usage, 1, &entry);
                                                   >> 776 
                                                   >> 777         if (n_ret)
                                                   >> 778                 return swp_offset(entry);
                                                   >> 779         else
                                                   >> 780                 return 0;
                                                   >> 781 
                                                   >> 782 }
                                                   >> 783 
                                                   >> 784 int get_swap_pages(int n_goal, swp_entry_t swp_entries[])
1252 {                                                785 {
1253         int order = swap_entry_order(entry_or << 
1254         unsigned long size = 1 << order;      << 
1255         struct swap_info_struct *si, *next;      786         struct swap_info_struct *si, *next;
1256         long avail_pgs;                          787         long avail_pgs;
1257         int n_ret = 0;                           788         int n_ret = 0;
1258         int node;                             << 
1259                                                  789 
1260         spin_lock(&swap_avail_lock);          !! 790         avail_pgs = atomic_long_read(&nr_swap_pages);
1261                                               !! 791         if (avail_pgs <= 0)
1262         avail_pgs = atomic_long_read(&nr_swap << 
1263         if (avail_pgs <= 0) {                 << 
1264                 spin_unlock(&swap_avail_lock) << 
1265                 goto noswap;                     792                 goto noswap;
1266         }                                     << 
1267                                                  793 
1268         n_goal = min3((long)n_goal, (long)SWA !! 794         if (n_goal > SWAP_BATCH)
                                                   >> 795                 n_goal = SWAP_BATCH;
                                                   >> 796 
                                                   >> 797         if (n_goal > avail_pgs)
                                                   >> 798                 n_goal = avail_pgs;
1269                                                  799 
1270         atomic_long_sub(n_goal * size, &nr_sw !! 800         atomic_long_sub(n_goal, &nr_swap_pages);
                                                   >> 801 
                                                   >> 802         spin_lock(&swap_avail_lock);
1271                                                  803 
1272 start_over:                                      804 start_over:
1273         node = numa_node_id();                !! 805         plist_for_each_entry_safe(si, next, &swap_avail_head, avail_list) {
1274         plist_for_each_entry_safe(si, next, & << 
1275                 /* requeue si to after same-p    806                 /* requeue si to after same-priority siblings */
1276                 plist_requeue(&si->avail_list !! 807                 plist_requeue(&si->avail_list, &swap_avail_head);
1277                 spin_unlock(&swap_avail_lock)    808                 spin_unlock(&swap_avail_lock);
1278                 spin_lock(&si->lock);            809                 spin_lock(&si->lock);
1279                 if (!si->highest_bit || !(si-    810                 if (!si->highest_bit || !(si->flags & SWP_WRITEOK)) {
1280                         spin_lock(&swap_avail    811                         spin_lock(&swap_avail_lock);
1281                         if (plist_node_empty( !! 812                         if (plist_node_empty(&si->avail_list)) {
1282                                 spin_unlock(&    813                                 spin_unlock(&si->lock);
1283                                 goto nextsi;     814                                 goto nextsi;
1284                         }                        815                         }
1285                         WARN(!si->highest_bit    816                         WARN(!si->highest_bit,
1286                              "swap_info %d in    817                              "swap_info %d in list but !highest_bit\n",
1287                              si->type);          818                              si->type);
1288                         WARN(!(si->flags & SW    819                         WARN(!(si->flags & SWP_WRITEOK),
1289                              "swap_info %d in    820                              "swap_info %d in list but !SWP_WRITEOK\n",
1290                              si->type);          821                              si->type);
1291                         __del_from_avail_list !! 822                         plist_del(&si->avail_list, &swap_avail_head);
1292                         spin_unlock(&si->lock    823                         spin_unlock(&si->lock);
1293                         goto nextsi;             824                         goto nextsi;
1294                 }                                825                 }
1295                 n_ret = scan_swap_map_slots(s    826                 n_ret = scan_swap_map_slots(si, SWAP_HAS_CACHE,
1296                                             n !! 827                                             n_goal, swp_entries);
1297                 spin_unlock(&si->lock);          828                 spin_unlock(&si->lock);
1298                 if (n_ret || size > 1)        !! 829                 if (n_ret)
1299                         goto check_out;          830                         goto check_out;
1300                 cond_resched();               !! 831                 pr_debug("scan_swap_map of si %d failed to find offset\n",
                                                   >> 832                         si->type);
1301                                                  833 
1302                 spin_lock(&swap_avail_lock);     834                 spin_lock(&swap_avail_lock);
1303 nextsi:                                          835 nextsi:
1304                 /*                               836                 /*
1305                  * if we got here, it's likel    837                  * if we got here, it's likely that si was almost full before,
1306                  * and since scan_swap_map_sl !! 838                  * and since scan_swap_map() can drop the si->lock, multiple
1307                  * multiple callers probably  !! 839                  * callers probably all tried to get a page from the same si
1308                  * same si and it filled up b !! 840                  * and it filled up before we could get one; or, the si filled
1309                  * filled up between us dropp !! 841                  * up between us dropping swap_avail_lock and taking si->lock.
1310                  * si->lock. Since we dropped !! 842                  * Since we dropped the swap_avail_lock, the swap_avail_head
1311                  * swap_avail_head list may h !! 843                  * list may have been modified; so if next is still in the
1312                  * still in the swap_avail_he !! 844                  * swap_avail_head list then try it, otherwise start over
1313                  * start over if we have not  !! 845                  * if we have not gotten any slots.
1314                  */                              846                  */
1315                 if (plist_node_empty(&next->a !! 847                 if (plist_node_empty(&next->avail_list))
1316                         goto start_over;         848                         goto start_over;
1317         }                                        849         }
1318                                                  850 
1319         spin_unlock(&swap_avail_lock);           851         spin_unlock(&swap_avail_lock);
1320                                                  852 
1321 check_out:                                       853 check_out:
1322         if (n_ret < n_goal)                      854         if (n_ret < n_goal)
1323                 atomic_long_add((long)(n_goal !! 855                 atomic_long_add((long) (n_goal-n_ret), &nr_swap_pages);
1324                                 &nr_swap_page << 
1325 noswap:                                          856 noswap:
1326         return n_ret;                            857         return n_ret;
1327 }                                                858 }
1328                                                  859 
1329 static struct swap_info_struct *_swap_info_ge !! 860 /* The only caller of this function is now suspend routine */
                                                   >> 861 swp_entry_t get_swap_page_of_type(int type)
1330 {                                                862 {
1331         struct swap_info_struct *si;             863         struct swap_info_struct *si;
1332         unsigned long offset;                 !! 864         pgoff_t offset;
                                                   >> 865 
                                                   >> 866         si = swap_info[type];
                                                   >> 867         spin_lock(&si->lock);
                                                   >> 868         if (si && (si->flags & SWP_WRITEOK)) {
                                                   >> 869                 atomic_long_dec(&nr_swap_pages);
                                                   >> 870                 /* This is called for allocating swap entry, not cache */
                                                   >> 871                 offset = scan_swap_map(si, 1);
                                                   >> 872                 if (offset) {
                                                   >> 873                         spin_unlock(&si->lock);
                                                   >> 874                         return swp_entry(type, offset);
                                                   >> 875                 }
                                                   >> 876                 atomic_long_inc(&nr_swap_pages);
                                                   >> 877         }
                                                   >> 878         spin_unlock(&si->lock);
                                                   >> 879         return (swp_entry_t) {0};
                                                   >> 880 }
                                                   >> 881 
                                                   >> 882 static struct swap_info_struct *__swap_info_get(swp_entry_t entry)
                                                   >> 883 {
                                                   >> 884         struct swap_info_struct *p;
                                                   >> 885         unsigned long offset, type;
1333                                                  886 
1334         if (!entry.val)                          887         if (!entry.val)
1335                 goto out;                        888                 goto out;
1336         si = swp_swap_info(entry);            !! 889         type = swp_type(entry);
1337         if (!si)                              !! 890         if (type >= nr_swapfiles)
1338                 goto bad_nofile;                 891                 goto bad_nofile;
1339         if (data_race(!(si->flags & SWP_USED) !! 892         p = swap_info[type];
                                                   >> 893         if (!(p->flags & SWP_USED))
1340                 goto bad_device;                 894                 goto bad_device;
1341         offset = swp_offset(entry);              895         offset = swp_offset(entry);
1342         if (offset >= si->max)                !! 896         if (offset >= p->max)
1343                 goto bad_offset;                 897                 goto bad_offset;
1344         if (data_race(!si->swap_map[swp_offse !! 898         return p;
1345                 goto bad_free;                << 
1346         return si;                            << 
1347                                                  899 
1348 bad_free:                                     << 
1349         pr_err("%s: %s%08lx\n", __func__, Unu << 
1350         goto out;                             << 
1351 bad_offset:                                      900 bad_offset:
1352         pr_err("%s: %s%08lx\n", __func__, Bad !! 901         pr_err("swap_info_get: %s%08lx\n", Bad_offset, entry.val);
1353         goto out;                                902         goto out;
1354 bad_device:                                      903 bad_device:
1355         pr_err("%s: %s%08lx\n", __func__, Unu !! 904         pr_err("swap_info_get: %s%08lx\n", Unused_file, entry.val);
1356         goto out;                                905         goto out;
1357 bad_nofile:                                      906 bad_nofile:
1358         pr_err("%s: %s%08lx\n", __func__, Bad !! 907         pr_err("swap_info_get: %s%08lx\n", Bad_file, entry.val);
                                                   >> 908 out:
                                                   >> 909         return NULL;
                                                   >> 910 }
                                                   >> 911 
                                                   >> 912 static struct swap_info_struct *_swap_info_get(swp_entry_t entry)
                                                   >> 913 {
                                                   >> 914         struct swap_info_struct *p;
                                                   >> 915 
                                                   >> 916         p = __swap_info_get(entry);
                                                   >> 917         if (!p)
                                                   >> 918                 goto out;
                                                   >> 919         if (!p->swap_map[swp_offset(entry)])
                                                   >> 920                 goto bad_free;
                                                   >> 921         return p;
                                                   >> 922 
                                                   >> 923 bad_free:
                                                   >> 924         pr_err("swap_info_get: %s%08lx\n", Unused_offset, entry.val);
                                                   >> 925         goto out;
1359 out:                                             926 out:
1360         return NULL;                             927         return NULL;
1361 }                                                928 }
1362                                                  929 
                                                   >> 930 static struct swap_info_struct *swap_info_get(swp_entry_t entry)
                                                   >> 931 {
                                                   >> 932         struct swap_info_struct *p;
                                                   >> 933 
                                                   >> 934         p = _swap_info_get(entry);
                                                   >> 935         if (p)
                                                   >> 936                 spin_lock(&p->lock);
                                                   >> 937         return p;
                                                   >> 938 }
                                                   >> 939 
1363 static struct swap_info_struct *swap_info_get    940 static struct swap_info_struct *swap_info_get_cont(swp_entry_t entry,
1364                                         struc    941                                         struct swap_info_struct *q)
1365 {                                                942 {
1366         struct swap_info_struct *p;              943         struct swap_info_struct *p;
1367                                                  944 
1368         p = _swap_info_get(entry);               945         p = _swap_info_get(entry);
1369                                                  946 
1370         if (p != q) {                            947         if (p != q) {
1371                 if (q != NULL)                   948                 if (q != NULL)
1372                         spin_unlock(&q->lock)    949                         spin_unlock(&q->lock);
1373                 if (p != NULL)                   950                 if (p != NULL)
1374                         spin_lock(&p->lock);     951                         spin_lock(&p->lock);
1375         }                                        952         }
1376         return p;                                953         return p;
1377 }                                                954 }
1378                                                  955 
1379 static unsigned char __swap_entry_free_locked !! 956 static unsigned char __swap_entry_free(struct swap_info_struct *p,
1380                                               !! 957                                        swp_entry_t entry, unsigned char usage)
1381                                               << 
1382 {                                                958 {
                                                   >> 959         struct swap_cluster_info *ci;
                                                   >> 960         unsigned long offset = swp_offset(entry);
1383         unsigned char count;                     961         unsigned char count;
1384         unsigned char has_cache;                 962         unsigned char has_cache;
1385                                                  963 
1386         count = si->swap_map[offset];         !! 964         ci = lock_cluster_or_swap_info(p, offset);
                                                   >> 965 
                                                   >> 966         count = p->swap_map[offset];
1387                                                  967 
1388         has_cache = count & SWAP_HAS_CACHE;      968         has_cache = count & SWAP_HAS_CACHE;
1389         count &= ~SWAP_HAS_CACHE;                969         count &= ~SWAP_HAS_CACHE;
1390                                                  970 
1391         if (usage == SWAP_HAS_CACHE) {           971         if (usage == SWAP_HAS_CACHE) {
1392                 VM_BUG_ON(!has_cache);           972                 VM_BUG_ON(!has_cache);
1393                 has_cache = 0;                   973                 has_cache = 0;
1394         } else if (count == SWAP_MAP_SHMEM) {    974         } else if (count == SWAP_MAP_SHMEM) {
1395                 /*                               975                 /*
1396                  * Or we could insist on shme    976                  * Or we could insist on shmem.c using a special
1397                  * swap_shmem_free() and free    977                  * swap_shmem_free() and free_shmem_swap_and_cache()...
1398                  */                              978                  */
1399                 count = 0;                       979                 count = 0;
1400         } else if ((count & ~COUNT_CONTINUED)    980         } else if ((count & ~COUNT_CONTINUED) <= SWAP_MAP_MAX) {
1401                 if (count == COUNT_CONTINUED)    981                 if (count == COUNT_CONTINUED) {
1402                         if (swap_count_contin !! 982                         if (swap_count_continued(p, offset, count))
1403                                 count = SWAP_    983                                 count = SWAP_MAP_MAX | COUNT_CONTINUED;
1404                         else                     984                         else
1405                                 count = SWAP_    985                                 count = SWAP_MAP_MAX;
1406                 } else                           986                 } else
1407                         count--;                 987                         count--;
1408         }                                        988         }
1409                                                  989 
1410         usage = count | has_cache;               990         usage = count | has_cache;
1411         if (usage)                            !! 991         p->swap_map[offset] = usage ? : SWAP_HAS_CACHE;
1412                 WRITE_ONCE(si->swap_map[offse << 
1413         else                                  << 
1414                 WRITE_ONCE(si->swap_map[offse << 
1415                                                  992 
1416         return usage;                         !! 993         unlock_cluster_or_swap_info(p, ci);
1417 }                                             << 
1418                                               << 
1419 /*                                            << 
1420  * When we get a swap entry, if there aren't  << 
1421  * prevent swapoff, such as the folio in swap << 
1422  * reader side is locked, etc., the swap entr << 
1423  * because of swapoff.  Then, we need to encl << 
1424  * functions with get_swap_device() and put_s << 
1425  * swap functions call get/put_swap_device()  << 
1426  *                                            << 
1427  * RCU reader side lock (including any spinlo << 
1428  * prevent swapoff, because synchronize_rcu() << 
1429  * before freeing data structures.            << 
1430  *                                            << 
1431  * Check whether swap entry is valid in the s << 
1432  * return pointer to swap_info_struct, and ke << 
1433  * via preventing the swap device from being  << 
1434  * put_swap_device() is called.  Otherwise re << 
1435  *                                            << 
1436  * Notice that swapoff or swapoff+swapon can  << 
1437  * percpu_ref_tryget_live() in get_swap_devic << 
1438  * percpu_ref_put() in put_swap_device() if t << 
1439  * to prevent swapoff.  The caller must be pr << 
1440  * example, the following situation is possib << 
1441  *                                            << 
1442  *   CPU1                               CPU2  << 
1443  *   do_swap_page()                           << 
1444  *     ...                              swapo << 
1445  *     __read_swap_cache_async()              << 
1446  *       swapcache_prepare()                  << 
1447  *         __swap_duplicate()                 << 
1448  *           // check swap_map                << 
1449  *     // verify PTE not changed              << 
1450  *                                            << 
1451  * In __swap_duplicate(), the swap_map need t << 
1452  * changing partly because the specified swap << 
1453  * swap device which has been swapoff.  And i << 
1454  * the page is read from the swap device, the << 
1455  * changed with the page table locked to chec << 
1456  * has been swapoff or swapoff+swapon.        << 
1457  */                                           << 
1458 struct swap_info_struct *get_swap_device(swp_ << 
1459 {                                             << 
1460         struct swap_info_struct *si;          << 
1461         unsigned long offset;                 << 
1462                                               << 
1463         if (!entry.val)                       << 
1464                 goto out;                     << 
1465         si = swp_swap_info(entry);            << 
1466         if (!si)                              << 
1467                 goto bad_nofile;              << 
1468         if (!percpu_ref_tryget_live(&si->user << 
1469                 goto out;                     << 
1470         /*                                    << 
1471          * Guarantee the si->users are checke << 
1472          * fields of swap_info_struct.        << 
1473          *                                    << 
1474          * Paired with the spin_unlock() afte << 
1475          * enable_swap_info().                << 
1476          */                                   << 
1477         smp_rmb();                            << 
1478         offset = swp_offset(entry);           << 
1479         if (offset >= si->max)                << 
1480                 goto put_out;                 << 
1481                                               << 
1482         return si;                            << 
1483 bad_nofile:                                   << 
1484         pr_err("%s: %s%08lx\n", __func__, Bad << 
1485 out:                                          << 
1486         return NULL;                          << 
1487 put_out:                                      << 
1488         pr_err("%s: %s%08lx\n", __func__, Bad << 
1489         percpu_ref_put(&si->users);           << 
1490         return NULL;                          << 
1491 }                                             << 
1492                                               << 
1493 static unsigned char __swap_entry_free(struct << 
1494                                        swp_en << 
1495 {                                             << 
1496         struct swap_cluster_info *ci;         << 
1497         unsigned long offset = swp_offset(ent << 
1498         unsigned char usage;                  << 
1499                                               << 
1500         ci = lock_cluster_or_swap_info(si, of << 
1501         usage = __swap_entry_free_locked(si,  << 
1502         unlock_cluster_or_swap_info(si, ci);  << 
1503         if (!usage)                           << 
1504                 free_swap_slot(entry);        << 
1505                                                  994 
1506         return usage;                            995         return usage;
1507 }                                                996 }
1508                                                  997 
1509 static bool __swap_entries_free(struct swap_i !! 998 static void swap_entry_free(struct swap_info_struct *p, swp_entry_t entry)
1510                 swp_entry_t entry, int nr)    << 
1511 {                                                999 {
1512         unsigned long offset = swp_offset(ent << 
1513         unsigned int type = swp_type(entry);  << 
1514         struct swap_cluster_info *ci;            1000         struct swap_cluster_info *ci;
1515         bool has_cache = false;               << 
1516         unsigned char count;                  << 
1517         int i;                                << 
1518                                               << 
1519         if (nr <= 1 || swap_count(data_race(s << 
1520                 goto fallback;                << 
1521         /* cross into another cluster */      << 
1522         if (nr > SWAPFILE_CLUSTER - offset %  << 
1523                 goto fallback;                << 
1524                                               << 
1525         ci = lock_cluster_or_swap_info(si, of << 
1526         if (!swap_is_last_map(si, offset, nr, << 
1527                 unlock_cluster_or_swap_info(s << 
1528                 goto fallback;                << 
1529         }                                     << 
1530         for (i = 0; i < nr; i++)              << 
1531                 WRITE_ONCE(si->swap_map[offse << 
1532         unlock_cluster_or_swap_info(si, ci);  << 
1533                                               << 
1534         if (!has_cache) {                     << 
1535                 for (i = 0; i < nr; i++)      << 
1536                         zswap_invalidate(swp_ << 
1537                 spin_lock(&si->lock);         << 
1538                 swap_entry_range_free(si, ent << 
1539                 spin_unlock(&si->lock);       << 
1540         }                                     << 
1541         return has_cache;                     << 
1542                                               << 
1543 fallback:                                     << 
1544         for (i = 0; i < nr; i++) {            << 
1545                 if (data_race(si->swap_map[of << 
1546                         count = __swap_entry_ << 
1547                         if (count == SWAP_HAS << 
1548                                 has_cache = t << 
1549                 } else {                      << 
1550                         WARN_ON_ONCE(1);      << 
1551                 }                             << 
1552         }                                     << 
1553         return has_cache;                     << 
1554 }                                             << 
1555                                               << 
1556 /*                                            << 
1557  * Drop the last HAS_CACHE flag of swap entri << 
1558  * ensure all entries belong to the same cgro << 
1559  */                                           << 
1560 static void swap_entry_range_free(struct swap << 
1561                                   unsigned in << 
1562 {                                             << 
1563         unsigned long offset = swp_offset(ent    1001         unsigned long offset = swp_offset(entry);
1564         unsigned char *map = si->swap_map + o !! 1002         unsigned char count;
1565         unsigned char *map_end = map + nr_pag << 
1566         struct swap_cluster_info *ci;         << 
1567                                                  1003 
1568         ci = lock_cluster(si, offset);        !! 1004         ci = lock_cluster(p, offset);
1569         do {                                  !! 1005         count = p->swap_map[offset];
1570                 VM_BUG_ON(*map != SWAP_HAS_CA !! 1006         VM_BUG_ON(count != SWAP_HAS_CACHE);
1571                 *map = 0;                     !! 1007         p->swap_map[offset] = 0;
1572         } while (++map < map_end);            !! 1008         dec_cluster_info_page(p, p->cluster_info, offset);
1573         dec_cluster_info_page(si, ci, nr_page << 
1574         unlock_cluster(ci);                      1009         unlock_cluster(ci);
1575                                                  1010 
1576         mem_cgroup_uncharge_swap(entry, nr_pa !! 1011         mem_cgroup_uncharge_swap(entry);
1577         swap_range_free(si, offset, nr_pages) !! 1012         if (offset < p->lowest_bit)
1578 }                                             !! 1013                 p->lowest_bit = offset;
1579                                               !! 1014         if (offset > p->highest_bit) {
1580 static void cluster_swap_free_nr(struct swap_ !! 1015                 bool was_full = !p->highest_bit;
1581                 unsigned long offset, int nr_ << 
1582                 unsigned char usage)          << 
1583 {                                             << 
1584         struct swap_cluster_info *ci;         << 
1585         DECLARE_BITMAP(to_free, BITS_PER_LONG << 
1586         int i, nr;                            << 
1587                                                  1016 
1588         ci = lock_cluster_or_swap_info(si, of !! 1017                 p->highest_bit = offset;
1589         while (nr_pages) {                    !! 1018                 if (was_full && (p->flags & SWP_WRITEOK)) {
1590                 nr = min(BITS_PER_LONG, nr_pa !! 1019                         spin_lock(&swap_avail_lock);
1591                 for (i = 0; i < nr; i++) {    !! 1020                         WARN_ON(!plist_node_empty(&p->avail_list));
1592                         if (!__swap_entry_fre !! 1021                         if (plist_node_empty(&p->avail_list))
1593                                 bitmap_set(to !! 1022                                 plist_add(&p->avail_list,
1594                 }                             !! 1023                                           &swap_avail_head);
1595                 if (!bitmap_empty(to_free, BI !! 1024                         spin_unlock(&swap_avail_lock);
1596                         unlock_cluster_or_swa !! 1025                 }
1597                         for_each_set_bit(i, t !! 1026         }
1598                                 free_swap_slo !! 1027         atomic_long_inc(&nr_swap_pages);
1599                         if (nr == nr_pages)   !! 1028         p->inuse_pages--;
1600                                 return;       !! 1029         frontswap_invalidate_page(p->type, offset);
1601                         bitmap_clear(to_free, !! 1030         if (p->flags & SWP_BLKDEV) {
1602                         ci = lock_cluster_or_ !! 1031                 struct gendisk *disk = p->bdev->bd_disk;
1603                 }                             !! 1032 
1604                 offset += nr;                 !! 1033                 if (disk->fops->swap_slot_free_notify)
1605                 nr_pages -= nr;               !! 1034                         disk->fops->swap_slot_free_notify(p->bdev,
                                                   >> 1035                                                           offset);
1606         }                                        1036         }
1607         unlock_cluster_or_swap_info(si, ci);  << 
1608 }                                                1037 }
1609                                                  1038 
1610 /*                                               1039 /*
1611  * Caller has made sure that the swap device     1040  * Caller has made sure that the swap device corresponding to entry
1612  * is still around or has not been recycled.     1041  * is still around or has not been recycled.
1613  */                                              1042  */
1614 void swap_free_nr(swp_entry_t entry, int nr_p !! 1043 void swap_free(swp_entry_t entry)
1615 {                                                1044 {
1616         int nr;                               !! 1045         struct swap_info_struct *p;
1617         struct swap_info_struct *sis;         << 
1618         unsigned long offset = swp_offset(ent << 
1619                                               << 
1620         sis = _swap_info_get(entry);          << 
1621         if (!sis)                             << 
1622                 return;                       << 
1623                                                  1046 
1624         while (nr_pages) {                    !! 1047         p = _swap_info_get(entry);
1625                 nr = min_t(int, nr_pages, SWA !! 1048         if (p) {
1626                 cluster_swap_free_nr(sis, off !! 1049                 if (!__swap_entry_free(p, entry, 1))
1627                 offset += nr;                 !! 1050                         free_swap_slot(entry);
1628                 nr_pages -= nr;               << 
1629         }                                        1051         }
1630 }                                                1052 }
1631                                                  1053 
1632 /*                                               1054 /*
1633  * Called after dropping swapcache to decreas    1055  * Called after dropping swapcache to decrease refcnt to swap entries.
1634  */                                              1056  */
1635 void put_swap_folio(struct folio *folio, swp_ !! 1057 void swapcache_free(swp_entry_t entry)
1636 {                                                1058 {
1637         unsigned long offset = swp_offset(ent !! 1059         struct swap_info_struct *p;
1638         struct swap_cluster_info *ci;         << 
1639         struct swap_info_struct *si;          << 
1640         int size = 1 << swap_entry_order(foli << 
1641                                               << 
1642         si = _swap_info_get(entry);           << 
1643         if (!si)                              << 
1644                 return;                       << 
1645                                                  1060 
1646         ci = lock_cluster_or_swap_info(si, of !! 1061         p = _swap_info_get(entry);
1647         if (size > 1 && swap_is_has_cache(si, !! 1062         if (p) {
1648                 unlock_cluster_or_swap_info(s !! 1063                 if (!__swap_entry_free(p, entry, SWAP_HAS_CACHE))
1649                 spin_lock(&si->lock);         << 
1650                 swap_entry_range_free(si, ent << 
1651                 spin_unlock(&si->lock);       << 
1652                 return;                       << 
1653         }                                     << 
1654         for (int i = 0; i < size; i++, entry. << 
1655                 if (!__swap_entry_free_locked << 
1656                         unlock_cluster_or_swa << 
1657                         free_swap_slot(entry)    1064                         free_swap_slot(entry);
1658                         if (i == size - 1)    << 
1659                                 return;       << 
1660                         lock_cluster_or_swap_ << 
1661                 }                             << 
1662         }                                        1065         }
1663         unlock_cluster_or_swap_info(si, ci);  << 
1664 }                                             << 
1665                                               << 
1666 static int swp_entry_cmp(const void *ent1, co << 
1667 {                                             << 
1668         const swp_entry_t *e1 = ent1, *e2 = e << 
1669                                               << 
1670         return (int)swp_type(*e1) - (int)swp_ << 
1671 }                                                1066 }
1672                                                  1067 
1673 void swapcache_free_entries(swp_entry_t *entr    1068 void swapcache_free_entries(swp_entry_t *entries, int n)
1674 {                                                1069 {
1675         struct swap_info_struct *p, *prev;       1070         struct swap_info_struct *p, *prev;
1676         int i;                                   1071         int i;
1677                                                  1072 
1678         if (n <= 0)                              1073         if (n <= 0)
1679                 return;                          1074                 return;
1680                                                  1075 
1681         prev = NULL;                             1076         prev = NULL;
1682         p = NULL;                                1077         p = NULL;
1683                                               << 
1684         /*                                    << 
1685          * Sort swap entries by swap device,  << 
1686          * nr_swapfiles isn't absolutely corr << 
1687          * so low that it isn't necessary to  << 
1688          */                                   << 
1689         if (nr_swapfiles > 1)                 << 
1690                 sort(entries, n, sizeof(entri << 
1691         for (i = 0; i < n; ++i) {                1078         for (i = 0; i < n; ++i) {
1692                 p = swap_info_get_cont(entrie    1079                 p = swap_info_get_cont(entries[i], prev);
1693                 if (p)                           1080                 if (p)
1694                         swap_entry_range_free !! 1081                         swap_entry_free(p, entries[i]);
1695                 prev = p;                        1082                 prev = p;
1696         }                                        1083         }
1697         if (p)                                   1084         if (p)
1698                 spin_unlock(&p->lock);           1085                 spin_unlock(&p->lock);
1699 }                                                1086 }
1700                                                  1087 
1701 int __swap_count(swp_entry_t entry)           !! 1088 /*
                                                   >> 1089  * How many references to page are currently swapped out?
                                                   >> 1090  * This does not give an exact answer when swap count is continued,
                                                   >> 1091  * but does include the high COUNT_CONTINUED flag to allow for that.
                                                   >> 1092  */
                                                   >> 1093 int page_swapcount(struct page *page)
1702 {                                                1094 {
1703         struct swap_info_struct *si = swp_swa !! 1095         int count = 0;
                                                   >> 1096         struct swap_info_struct *p;
                                                   >> 1097         struct swap_cluster_info *ci;
                                                   >> 1098         swp_entry_t entry;
                                                   >> 1099         unsigned long offset;
                                                   >> 1100 
                                                   >> 1101         entry.val = page_private(page);
                                                   >> 1102         p = _swap_info_get(entry);
                                                   >> 1103         if (p) {
                                                   >> 1104                 offset = swp_offset(entry);
                                                   >> 1105                 ci = lock_cluster_or_swap_info(p, offset);
                                                   >> 1106                 count = swap_count(p->swap_map[offset]);
                                                   >> 1107                 unlock_cluster_or_swap_info(p, ci);
                                                   >> 1108         }
                                                   >> 1109         return count;
                                                   >> 1110 }
                                                   >> 1111 
                                                   >> 1112 static int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry)
                                                   >> 1113 {
                                                   >> 1114         int count = 0;
1704         pgoff_t offset = swp_offset(entry);      1115         pgoff_t offset = swp_offset(entry);
                                                   >> 1116         struct swap_cluster_info *ci;
1705                                                  1117 
1706         return swap_count(si->swap_map[offset !! 1118         ci = lock_cluster_or_swap_info(si, offset);
                                                   >> 1119         count = swap_count(si->swap_map[offset]);
                                                   >> 1120         unlock_cluster_or_swap_info(si, ci);
                                                   >> 1121         return count;
1707 }                                                1122 }
1708                                                  1123 
1709 /*                                               1124 /*
1710  * How many references to @entry are currentl    1125  * How many references to @entry are currently swapped out?
1711  * This does not give an exact answer when sw    1126  * This does not give an exact answer when swap count is continued,
1712  * but does include the high COUNT_CONTINUED     1127  * but does include the high COUNT_CONTINUED flag to allow for that.
1713  */                                              1128  */
1714 int swap_swapcount(struct swap_info_struct *s !! 1129 int __swp_swapcount(swp_entry_t entry)
1715 {                                                1130 {
1716         pgoff_t offset = swp_offset(entry);   !! 1131         int count = 0;
1717         struct swap_cluster_info *ci;         !! 1132         struct swap_info_struct *si;
1718         int count;                            << 
1719                                                  1133 
1720         ci = lock_cluster_or_swap_info(si, of !! 1134         si = __swap_info_get(entry);
1721         count = swap_count(si->swap_map[offse !! 1135         if (si)
1722         unlock_cluster_or_swap_info(si, ci);  !! 1136                 count = swap_swapcount(si, entry);
1723         return count;                            1137         return count;
1724 }                                                1138 }
1725                                                  1139 
1726 /*                                               1140 /*
1727  * How many references to @entry are currentl    1141  * How many references to @entry are currently swapped out?
1728  * This considers COUNT_CONTINUED so it retur    1142  * This considers COUNT_CONTINUED so it returns exact answer.
1729  */                                              1143  */
1730 int swp_swapcount(swp_entry_t entry)             1144 int swp_swapcount(swp_entry_t entry)
1731 {                                                1145 {
1732         int count, tmp_count, n;                 1146         int count, tmp_count, n;
1733         struct swap_info_struct *si;          !! 1147         struct swap_info_struct *p;
1734         struct swap_cluster_info *ci;            1148         struct swap_cluster_info *ci;
1735         struct page *page;                       1149         struct page *page;
1736         pgoff_t offset;                          1150         pgoff_t offset;
1737         unsigned char *map;                      1151         unsigned char *map;
1738                                                  1152 
1739         si = _swap_info_get(entry);           !! 1153         p = _swap_info_get(entry);
1740         if (!si)                              !! 1154         if (!p)
1741                 return 0;                        1155                 return 0;
1742                                                  1156 
1743         offset = swp_offset(entry);              1157         offset = swp_offset(entry);
1744                                                  1158 
1745         ci = lock_cluster_or_swap_info(si, of !! 1159         ci = lock_cluster_or_swap_info(p, offset);
1746                                                  1160 
1747         count = swap_count(si->swap_map[offse !! 1161         count = swap_count(p->swap_map[offset]);
1748         if (!(count & COUNT_CONTINUED))          1162         if (!(count & COUNT_CONTINUED))
1749                 goto out;                        1163                 goto out;
1750                                                  1164 
1751         count &= ~COUNT_CONTINUED;               1165         count &= ~COUNT_CONTINUED;
1752         n = SWAP_MAP_MAX + 1;                    1166         n = SWAP_MAP_MAX + 1;
1753                                                  1167 
1754         page = vmalloc_to_page(si->swap_map + !! 1168         page = vmalloc_to_page(p->swap_map + offset);
1755         offset &= ~PAGE_MASK;                    1169         offset &= ~PAGE_MASK;
1756         VM_BUG_ON(page_private(page) != SWP_C    1170         VM_BUG_ON(page_private(page) != SWP_CONTINUED);
1757                                                  1171 
1758         do {                                     1172         do {
1759                 page = list_next_entry(page,     1173                 page = list_next_entry(page, lru);
1760                 map = kmap_local_page(page);  !! 1174                 map = kmap_atomic(page);
1761                 tmp_count = map[offset];         1175                 tmp_count = map[offset];
1762                 kunmap_local(map);            !! 1176                 kunmap_atomic(map);
1763                                                  1177 
1764                 count += (tmp_count & ~COUNT_    1178                 count += (tmp_count & ~COUNT_CONTINUED) * n;
1765                 n *= (SWAP_CONT_MAX + 1);        1179                 n *= (SWAP_CONT_MAX + 1);
1766         } while (tmp_count & COUNT_CONTINUED)    1180         } while (tmp_count & COUNT_CONTINUED);
1767 out:                                             1181 out:
1768         unlock_cluster_or_swap_info(si, ci);  !! 1182         unlock_cluster_or_swap_info(p, ci);
1769         return count;                            1183         return count;
1770 }                                                1184 }
1771                                                  1185 
1772 static bool swap_page_trans_huge_swapped(stru !! 1186 /*
1773                                          swp_ !! 1187  * We can write to an anon page without COW if there are no other references
1774 {                                             !! 1188  * to it.  And as a side-effect, free up its swap: because the old content
1775         struct swap_cluster_info *ci;         !! 1189  * on disk will never be read, and seeking back there to write new content
1776         unsigned char *map = si->swap_map;    !! 1190  * later would only waste time away from clustering.
1777         unsigned int nr_pages = 1 << order;   !! 1191  *
1778         unsigned long roffset = swp_offset(en !! 1192  * NOTE: total_mapcount should not be relied upon by the caller if
1779         unsigned long offset = round_down(rof !! 1193  * reuse_swap_page() returns false, but it may be always overwritten
1780         int i;                                !! 1194  * (see the other implementation for CONFIG_SWAP=n).
1781         bool ret = false;                     !! 1195  */
1782                                               !! 1196 bool reuse_swap_page(struct page *page, int *total_mapcount)
1783         ci = lock_cluster_or_swap_info(si, of << 
1784         if (!ci || nr_pages == 1) {           << 
1785                 if (swap_count(map[roffset])) << 
1786                         ret = true;           << 
1787                 goto unlock_out;              << 
1788         }                                     << 
1789         for (i = 0; i < nr_pages; i++) {      << 
1790                 if (swap_count(map[offset + i << 
1791                         ret = true;           << 
1792                         break;                << 
1793                 }                             << 
1794         }                                     << 
1795 unlock_out:                                   << 
1796         unlock_cluster_or_swap_info(si, ci);  << 
1797         return ret;                           << 
1798 }                                             << 
1799                                               << 
1800 static bool folio_swapped(struct folio *folio << 
1801 {                                                1197 {
1802         swp_entry_t entry = folio->swap;      !! 1198         int count;
1803         struct swap_info_struct *si = _swap_i << 
1804                                                  1199 
1805         if (!si)                              !! 1200         VM_BUG_ON_PAGE(!PageLocked(page), page);
                                                   >> 1201         if (unlikely(PageKsm(page)))
1806                 return false;                    1202                 return false;
                                                   >> 1203         count = page_trans_huge_mapcount(page, total_mapcount);
                                                   >> 1204         if (count <= 1 && PageSwapCache(page)) {
                                                   >> 1205                 count += page_swapcount(page);
                                                   >> 1206                 if (count != 1)
                                                   >> 1207                         goto out;
                                                   >> 1208                 if (!PageWriteback(page)) {
                                                   >> 1209                         delete_from_swap_cache(page);
                                                   >> 1210                         SetPageDirty(page);
                                                   >> 1211                 } else {
                                                   >> 1212                         swp_entry_t entry;
                                                   >> 1213                         struct swap_info_struct *p;
1807                                                  1214 
1808         if (!IS_ENABLED(CONFIG_THP_SWAP) || l !! 1215                         entry.val = page_private(page);
1809                 return swap_swapcount(si, ent !! 1216                         p = swap_info_get(entry);
1810                                               !! 1217                         if (p->flags & SWP_STABLE_WRITES) {
1811         return swap_page_trans_huge_swapped(s !! 1218                                 spin_unlock(&p->lock);
                                                   >> 1219                                 return false;
                                                   >> 1220                         }
                                                   >> 1221                         spin_unlock(&p->lock);
                                                   >> 1222                 }
                                                   >> 1223         }
                                                   >> 1224 out:
                                                   >> 1225         return count <= 1;
1812 }                                                1226 }
1813                                                  1227 
1814 static bool folio_swapcache_freeable(struct f !! 1228 /*
                                                   >> 1229  * If swap is getting full, or if there are no more mappings of this page,
                                                   >> 1230  * then try_to_free_swap is called to free its swap space.
                                                   >> 1231  */
                                                   >> 1232 int try_to_free_swap(struct page *page)
1815 {                                                1233 {
1816         VM_BUG_ON_FOLIO(!folio_test_locked(fo !! 1234         VM_BUG_ON_PAGE(!PageLocked(page), page);
1817                                                  1235 
1818         if (!folio_test_swapcache(folio))     !! 1236         if (!PageSwapCache(page))
1819                 return false;                 !! 1237                 return 0;
1820         if (folio_test_writeback(folio))      !! 1238         if (PageWriteback(page))
1821                 return false;                 !! 1239                 return 0;
                                                   >> 1240         if (page_swapcount(page))
                                                   >> 1241                 return 0;
1822                                                  1242 
1823         /*                                       1243         /*
1824          * Once hibernation has begun to crea    1244          * Once hibernation has begun to create its image of memory,
1825          * there's a danger that one of the c !! 1245          * there's a danger that one of the calls to try_to_free_swap()
1826          * - most probably a call from __try_    1246          * - most probably a call from __try_to_reclaim_swap() while
1827          * hibernation is allocating its own     1247          * hibernation is allocating its own swap pages for the image,
1828          * but conceivably even a call from m    1248          * but conceivably even a call from memory reclaim - will free
1829          * the swap from a folio which has al !! 1249          * the swap from a page which has already been recorded in the
1830          * image as a clean swapcache folio,  !! 1250          * image as a clean swapcache page, and then reuse its swap for
1831          * another page of the image.  On wak    1251          * another page of the image.  On waking from hibernation, the
1832          * original folio might be freed unde !! 1252          * original page might be freed under memory pressure, then
1833          * later read back in from swap, now     1253          * later read back in from swap, now with the wrong data.
1834          *                                       1254          *
1835          * Hibernation suspends storage while    1255          * Hibernation suspends storage while it is writing the image
1836          * to disk so check that here.           1256          * to disk so check that here.
1837          */                                      1257          */
1838         if (pm_suspended_storage())              1258         if (pm_suspended_storage())
1839                 return false;                 !! 1259                 return 0;
1840                                               << 
1841         return true;                          << 
1842 }                                             << 
1843                                               << 
1844 /**                                           << 
1845  * folio_free_swap() - Free the swap space us << 
1846  * @folio: The folio to remove.               << 
1847  *                                            << 
1848  * If swap is getting full, or if there are n << 
1849  * then call folio_free_swap to free its swap << 
1850  *                                            << 
1851  * Return: true if we were able to release th << 
1852  */                                           << 
1853 bool folio_free_swap(struct folio *folio)     << 
1854 {                                             << 
1855         if (!folio_swapcache_freeable(folio)) << 
1856                 return false;                 << 
1857         if (folio_swapped(folio))             << 
1858                 return false;                 << 
1859                                                  1260 
1860         delete_from_swap_cache(folio);        !! 1261         delete_from_swap_cache(page);
1861         folio_set_dirty(folio);               !! 1262         SetPageDirty(page);
1862         return true;                          !! 1263         return 1;
1863 }                                                1264 }
1864                                                  1265 
1865 /**                                           !! 1266 /*
1866  * free_swap_and_cache_nr() - Release referen !! 1267  * Free the swap entry like above, but also try to
1867  *                            reclaim their c !! 1268  * free the page cache entry if it is the last user.
1868  * @entry: First entry of range.              << 
1869  * @nr: Number of entries in range.           << 
1870  *                                            << 
1871  * For each swap entry in the contiguous rang << 
1872  * entries become free, try to reclaim their  << 
1873  * offset range is defined by [entry.offset,  << 
1874  */                                              1269  */
1875 void free_swap_and_cache_nr(swp_entry_t entry !! 1270 int free_swap_and_cache(swp_entry_t entry)
1876 {                                                1271 {
1877         const unsigned long start_offset = sw !! 1272         struct swap_info_struct *p;
1878         const unsigned long end_offset = star !! 1273         struct page *page = NULL;
1879         struct swap_info_struct *si;          !! 1274         unsigned char count;
1880         bool any_only_cache = false;          << 
1881         unsigned long offset;                 << 
1882                                                  1275 
1883         if (non_swap_entry(entry))               1276         if (non_swap_entry(entry))
1884                 return;                       !! 1277                 return 1;
1885                                               << 
1886         si = get_swap_device(entry);          << 
1887         if (!si)                              << 
1888                 return;                       << 
1889                                               << 
1890         if (WARN_ON(end_offset > si->max))    << 
1891                 goto out;                     << 
1892                                               << 
1893         /*                                    << 
1894          * First free all entries in the rang << 
1895          */                                   << 
1896         any_only_cache = __swap_entries_free( << 
1897                                               << 
1898         /*                                    << 
1899          * Short-circuit the below loop if no << 
1900          * reference drop to zero.            << 
1901          */                                   << 
1902         if (!any_only_cache)                  << 
1903                 goto out;                     << 
1904                                                  1278 
1905         /*                                    !! 1279         p = _swap_info_get(entry);
1906          * Now go back over the range trying  !! 1280         if (p) {
1907          * more efficient for large folios be !! 1281                 count = __swap_entry_free(p, entry, 1);
1908          * the swap once per folio in the com !! 1282                 if (count == SWAP_HAS_CACHE) {
1909          * __swap_entry_free() and __try_to_r !! 1283                         page = find_get_page(swap_address_space(entry),
1910          * latter will get a reference and lo !! 1284                                              swp_offset(entry));
1911          * page but will only succeed once th !! 1285                         if (page && !trylock_page(page)) {
1912          * zero.                              !! 1286                                 put_page(page);
1913          */                                   !! 1287                                 page = NULL;
1914         for (offset = start_offset; offset <  !! 1288                         }
1915                 nr = 1;                       !! 1289                 } else if (!count)
1916                 if (READ_ONCE(si->swap_map[of !! 1290                         free_swap_slot(entry);
1917                         /*                    !! 1291         }
1918                          * Folios are always  !! 1292         if (page) {
1919                          * advance forward to !! 1293                 /*
1920                          * folio was found fo !! 1294                  * Not mapped elsewhere, or swap space full? Free it!
1921                          * in this case. Nega !! 1295                  * Also recheck PageSwapCache now page is locked (above).
1922                          * but could not be r !! 1296                  */
1923                          * to the next bounda !! 1297                 if (PageSwapCache(page) && !PageWriteback(page) &&
1924                          */                   !! 1298                     (!page_mapped(page) || mem_cgroup_swap_full(page)) &&
1925                         nr = __try_to_reclaim !! 1299                     !swap_swapcount(p, entry)) {
1926                                               !! 1300                         delete_from_swap_cache(page);
1927                         if (nr == 0)          !! 1301                         SetPageDirty(page);
1928                                 nr = 1;       << 
1929                         else if (nr < 0)      << 
1930                                 nr = -nr;     << 
1931                         nr = ALIGN(offset + 1 << 
1932                 }                                1302                 }
                                                   >> 1303                 unlock_page(page);
                                                   >> 1304                 put_page(page);
1933         }                                        1305         }
1934                                               !! 1306         return p != NULL;
1935 out:                                          << 
1936         put_swap_device(si);                  << 
1937 }                                                1307 }
1938                                                  1308 
1939 #ifdef CONFIG_HIBERNATION                        1309 #ifdef CONFIG_HIBERNATION
1940                                               << 
1941 swp_entry_t get_swap_page_of_type(int type)   << 
1942 {                                             << 
1943         struct swap_info_struct *si = swap_ty << 
1944         swp_entry_t entry = {0};              << 
1945                                               << 
1946         if (!si)                              << 
1947                 goto fail;                    << 
1948                                               << 
1949         /* This is called for allocating swap << 
1950         spin_lock(&si->lock);                 << 
1951         if ((si->flags & SWP_WRITEOK) && scan << 
1952                 atomic_long_dec(&nr_swap_page << 
1953         spin_unlock(&si->lock);               << 
1954 fail:                                         << 
1955         return entry;                         << 
1956 }                                             << 
1957                                               << 
1958 /*                                               1310 /*
1959  * Find the swap type that corresponds to giv    1311  * Find the swap type that corresponds to given device (if any).
1960  *                                               1312  *
1961  * @offset - number of the PAGE_SIZE-sized bl    1313  * @offset - number of the PAGE_SIZE-sized block of the device, starting
1962  * from 0, in which the swap header is expect    1314  * from 0, in which the swap header is expected to be located.
1963  *                                               1315  *
1964  * This is needed for the suspend to disk (ak    1316  * This is needed for the suspend to disk (aka swsusp).
1965  */                                              1317  */
1966 int swap_type_of(dev_t device, sector_t offse !! 1318 int swap_type_of(dev_t device, sector_t offset, struct block_device **bdev_p)
1967 {                                                1319 {
                                                   >> 1320         struct block_device *bdev = NULL;
1968         int type;                                1321         int type;
1969                                                  1322 
1970         if (!device)                          !! 1323         if (device)
1971                 return -1;                    !! 1324                 bdev = bdget(device);
1972                                                  1325 
1973         spin_lock(&swap_lock);                   1326         spin_lock(&swap_lock);
1974         for (type = 0; type < nr_swapfiles; t    1327         for (type = 0; type < nr_swapfiles; type++) {
1975                 struct swap_info_struct *sis     1328                 struct swap_info_struct *sis = swap_info[type];
1976                                                  1329 
1977                 if (!(sis->flags & SWP_WRITEO    1330                 if (!(sis->flags & SWP_WRITEOK))
1978                         continue;                1331                         continue;
1979                                                  1332 
1980                 if (device == sis->bdev->bd_d !! 1333                 if (!bdev) {
1981                         struct swap_extent *s !! 1334                         if (bdev_p)
                                                   >> 1335                                 *bdev_p = bdgrab(sis->bdev);
                                                   >> 1336 
                                                   >> 1337                         spin_unlock(&swap_lock);
                                                   >> 1338                         return type;
                                                   >> 1339                 }
                                                   >> 1340                 if (bdev == sis->bdev) {
                                                   >> 1341                         struct swap_extent *se = &sis->first_swap_extent;
1982                                                  1342 
1983                         if (se->start_block =    1343                         if (se->start_block == offset) {
                                                   >> 1344                                 if (bdev_p)
                                                   >> 1345                                         *bdev_p = bdgrab(sis->bdev);
                                                   >> 1346 
1984                                 spin_unlock(&    1347                                 spin_unlock(&swap_lock);
                                                   >> 1348                                 bdput(bdev);
1985                                 return type;     1349                                 return type;
1986                         }                        1350                         }
1987                 }                                1351                 }
1988         }                                        1352         }
1989         spin_unlock(&swap_lock);                 1353         spin_unlock(&swap_lock);
1990         return -ENODEV;                       !! 1354         if (bdev)
1991 }                                             !! 1355                 bdput(bdev);
1992                                                  1356 
1993 int find_first_swap(dev_t *device)            << 
1994 {                                             << 
1995         int type;                             << 
1996                                               << 
1997         spin_lock(&swap_lock);                << 
1998         for (type = 0; type < nr_swapfiles; t << 
1999                 struct swap_info_struct *sis  << 
2000                                               << 
2001                 if (!(sis->flags & SWP_WRITEO << 
2002                         continue;             << 
2003                 *device = sis->bdev->bd_dev;  << 
2004                 spin_unlock(&swap_lock);      << 
2005                 return type;                  << 
2006         }                                     << 
2007         spin_unlock(&swap_lock);              << 
2008         return -ENODEV;                          1357         return -ENODEV;
2009 }                                                1358 }
2010                                                  1359 
2011 /*                                               1360 /*
2012  * Get the (PAGE_SIZE) block corresponding to    1361  * Get the (PAGE_SIZE) block corresponding to given offset on the swapdev
2013  * corresponding to given index in swap_info     1362  * corresponding to given index in swap_info (swap type).
2014  */                                              1363  */
2015 sector_t swapdev_block(int type, pgoff_t offs    1364 sector_t swapdev_block(int type, pgoff_t offset)
2016 {                                                1365 {
2017         struct swap_info_struct *si = swap_ty !! 1366         struct block_device *bdev;
2018         struct swap_extent *se;               << 
2019                                                  1367 
2020         if (!si || !(si->flags & SWP_WRITEOK) !! 1368         if ((unsigned int)type >= nr_swapfiles)
                                                   >> 1369                 return 0;
                                                   >> 1370         if (!(swap_info[type]->flags & SWP_WRITEOK))
2021                 return 0;                        1371                 return 0;
2022         se = offset_to_swap_extent(si, offset !! 1372         return map_swap_entry(swp_entry(type, offset), &bdev);
2023         return se->start_block + (offset - se << 
2024 }                                                1373 }
2025                                                  1374 
2026 /*                                               1375 /*
2027  * Return either the total number of swap pag    1376  * Return either the total number of swap pages of given type, or the number
2028  * of free pages of that type (depending on @    1377  * of free pages of that type (depending on @free)
2029  *                                               1378  *
2030  * This is needed for software suspend           1379  * This is needed for software suspend
2031  */                                              1380  */
2032 unsigned int count_swap_pages(int type, int f    1381 unsigned int count_swap_pages(int type, int free)
2033 {                                                1382 {
2034         unsigned int n = 0;                      1383         unsigned int n = 0;
2035                                                  1384 
2036         spin_lock(&swap_lock);                   1385         spin_lock(&swap_lock);
2037         if ((unsigned int)type < nr_swapfiles    1386         if ((unsigned int)type < nr_swapfiles) {
2038                 struct swap_info_struct *sis     1387                 struct swap_info_struct *sis = swap_info[type];
2039                                                  1388 
2040                 spin_lock(&sis->lock);           1389                 spin_lock(&sis->lock);
2041                 if (sis->flags & SWP_WRITEOK)    1390                 if (sis->flags & SWP_WRITEOK) {
2042                         n = sis->pages;          1391                         n = sis->pages;
2043                         if (free)                1392                         if (free)
2044                                 n -= sis->inu    1393                                 n -= sis->inuse_pages;
2045                 }                                1394                 }
2046                 spin_unlock(&sis->lock);         1395                 spin_unlock(&sis->lock);
2047         }                                        1396         }
2048         spin_unlock(&swap_lock);                 1397         spin_unlock(&swap_lock);
2049         return n;                                1398         return n;
2050 }                                                1399 }
2051 #endif /* CONFIG_HIBERNATION */                  1400 #endif /* CONFIG_HIBERNATION */
2052                                                  1401 
2053 static inline int pte_same_as_swp(pte_t pte,     1402 static inline int pte_same_as_swp(pte_t pte, pte_t swp_pte)
2054 {                                                1403 {
2055         return pte_same(pte_swp_clear_flags(p !! 1404         return pte_same(pte_swp_clear_soft_dirty(pte), swp_pte);
2056 }                                                1405 }
2057                                                  1406 
2058 /*                                               1407 /*
2059  * No need to decide whether this PTE shares     1408  * No need to decide whether this PTE shares the swap entry with others,
2060  * just let do_wp_page work it out if a write    1409  * just let do_wp_page work it out if a write is requested later - to
2061  * force COW, vm_page_prot omits write permis    1410  * force COW, vm_page_prot omits write permission from any private vma.
2062  */                                              1411  */
2063 static int unuse_pte(struct vm_area_struct *v    1412 static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
2064                 unsigned long addr, swp_entry !! 1413                 unsigned long addr, swp_entry_t entry, struct page *page)
2065 {                                                1414 {
2066         struct page *page;                    !! 1415         struct page *swapcache;
2067         struct folio *swapcache;              !! 1416         struct mem_cgroup *memcg;
2068         spinlock_t *ptl;                         1417         spinlock_t *ptl;
2069         pte_t *pte, new_pte, old_pte;         !! 1418         pte_t *pte;
2070         bool hwpoisoned = false;              << 
2071         int ret = 1;                             1419         int ret = 1;
2072                                                  1420 
2073         swapcache = folio;                    !! 1421         swapcache = page;
2074         folio = ksm_might_need_to_copy(folio, !! 1422         page = ksm_might_need_to_copy(page, vma, addr);
2075         if (unlikely(!folio))                 !! 1423         if (unlikely(!page))
2076                 return -ENOMEM;                  1424                 return -ENOMEM;
2077         else if (unlikely(folio == ERR_PTR(-E << 
2078                 hwpoisoned = true;            << 
2079                 folio = swapcache;            << 
2080         }                                     << 
2081                                                  1425 
2082         page = folio_file_page(folio, swp_off !! 1426         if (mem_cgroup_try_charge(page, vma->vm_mm, GFP_KERNEL,
2083         if (PageHWPoison(page))               !! 1427                                 &memcg, false)) {
2084                 hwpoisoned = true;            !! 1428                 ret = -ENOMEM;
                                                   >> 1429                 goto out_nolock;
                                                   >> 1430         }
2085                                                  1431 
2086         pte = pte_offset_map_lock(vma->vm_mm,    1432         pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
2087         if (unlikely(!pte || !pte_same_as_swp !! 1433         if (unlikely(!pte_same_as_swp(*pte, swp_entry_to_pte(entry)))) {
2088                                               !! 1434                 mem_cgroup_cancel_charge(page, memcg, false);
2089                 ret = 0;                         1435                 ret = 0;
2090                 goto out;                        1436                 goto out;
2091         }                                        1437         }
2092                                                  1438 
2093         old_pte = ptep_get(pte);              << 
2094                                               << 
2095         if (unlikely(hwpoisoned || !folio_tes << 
2096                 swp_entry_t swp_entry;        << 
2097                                               << 
2098                 dec_mm_counter(vma->vm_mm, MM << 
2099                 if (hwpoisoned) {             << 
2100                         swp_entry = make_hwpo << 
2101                 } else {                      << 
2102                         swp_entry = make_pois << 
2103                 }                             << 
2104                 new_pte = swp_entry_to_pte(sw << 
2105                 ret = 0;                      << 
2106                 goto setpte;                  << 
2107         }                                     << 
2108                                               << 
2109         /*                                    << 
2110          * Some architectures may have to res << 
2111          * when reading from swap. This metad << 
2112          * so this must be called before swap << 
2113          */                                   << 
2114         arch_swap_restore(folio_swap(entry, f << 
2115                                               << 
2116         dec_mm_counter(vma->vm_mm, MM_SWAPENT    1439         dec_mm_counter(vma->vm_mm, MM_SWAPENTS);
2117         inc_mm_counter(vma->vm_mm, MM_ANONPAG    1440         inc_mm_counter(vma->vm_mm, MM_ANONPAGES);
2118         folio_get(folio);                     !! 1441         get_page(page);
2119         if (folio == swapcache) {             !! 1442         set_pte_at(vma->vm_mm, addr, pte,
2120                 rmap_t rmap_flags = RMAP_NONE !! 1443                    pte_mkold(mk_pte(page, vma->vm_page_prot)));
2121                                               !! 1444         if (page == swapcache) {
2122                 /*                            !! 1445                 page_add_anon_rmap(page, vma, addr, false);
2123                  * See do_swap_page(): writeb !! 1446                 mem_cgroup_commit_charge(page, memcg, true, false);
2124                  * However, we do a folio_wai << 
2125                  * call and have the folio lo << 
2126                  */                           << 
2127                 VM_BUG_ON_FOLIO(folio_test_wr << 
2128                 if (pte_swp_exclusive(old_pte << 
2129                         rmap_flags |= RMAP_EX << 
2130                 /*                            << 
2131                  * We currently only expect s << 
2132                  * fully exclusive or fully s << 
2133                  * here, we have to be carefu << 
2134                  */                           << 
2135                 if (!folio_test_anon(folio))  << 
2136                         VM_WARN_ON_ONCE(folio << 
2137                         VM_WARN_ON_FOLIO(!fol << 
2138                         folio_add_new_anon_rm << 
2139                 } else {                      << 
2140                         folio_add_anon_rmap_p << 
2141                 }                             << 
2142         } else { /* ksm created a completely     1447         } else { /* ksm created a completely new copy */
2143                 folio_add_new_anon_rmap(folio !! 1448                 page_add_new_anon_rmap(page, vma, addr, false);
2144                 folio_add_lru_vma(folio, vma) !! 1449                 mem_cgroup_commit_charge(page, memcg, false, false);
                                                   >> 1450                 lru_cache_add_active_or_unevictable(page, vma);
2145         }                                        1451         }
2146         new_pte = pte_mkold(mk_pte(page, vma- << 
2147         if (pte_swp_soft_dirty(old_pte))      << 
2148                 new_pte = pte_mksoft_dirty(ne << 
2149         if (pte_swp_uffd_wp(old_pte))         << 
2150                 new_pte = pte_mkuffd_wp(new_p << 
2151 setpte:                                       << 
2152         set_pte_at(vma->vm_mm, addr, pte, new << 
2153         swap_free(entry);                        1452         swap_free(entry);
                                                   >> 1453         /*
                                                   >> 1454          * Move the page to the active list so it is not
                                                   >> 1455          * immediately swapped out again after swapon.
                                                   >> 1456          */
                                                   >> 1457         activate_page(page);
2154 out:                                             1458 out:
2155         if (pte)                              !! 1459         pte_unmap_unlock(pte, ptl);
2156                 pte_unmap_unlock(pte, ptl);   !! 1460 out_nolock:
2157         if (folio != swapcache) {             !! 1461         if (page != swapcache) {
2158                 folio_unlock(folio);          !! 1462                 unlock_page(page);
2159                 folio_put(folio);             !! 1463                 put_page(page);
2160         }                                        1464         }
2161         return ret;                              1465         return ret;
2162 }                                                1466 }
2163                                                  1467 
2164 static int unuse_pte_range(struct vm_area_str    1468 static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
2165                         unsigned long addr, u !! 1469                                 unsigned long addr, unsigned long end,
2166                         unsigned int type)    !! 1470                                 swp_entry_t entry, struct page *page)
2167 {                                                1471 {
2168         pte_t *pte = NULL;                    !! 1472         pte_t swp_pte = swp_entry_to_pte(entry);
2169         struct swap_info_struct *si;          !! 1473         pte_t *pte;
                                                   >> 1474         int ret = 0;
2170                                                  1475 
2171         si = swap_info[type];                 !! 1476         /*
                                                   >> 1477          * We don't actually need pte lock while scanning for swp_pte: since
                                                   >> 1478          * we hold page lock and mmap_sem, swp_pte cannot be inserted into the
                                                   >> 1479          * page table while we're scanning; though it could get zapped, and on
                                                   >> 1480          * some architectures (e.g. x86_32 with PAE) we might catch a glimpse
                                                   >> 1481          * of unmatched parts which look like swp_pte, so unuse_pte must
                                                   >> 1482          * recheck under pte lock.  Scanning without pte lock lets it be
                                                   >> 1483          * preemptable whenever CONFIG_PREEMPT but not CONFIG_HIGHPTE.
                                                   >> 1484          */
                                                   >> 1485         pte = pte_offset_map(pmd, addr);
2172         do {                                     1486         do {
2173                 struct folio *folio;          !! 1487                 /*
2174                 unsigned long offset;         !! 1488                  * swapoff spends a _lot_ of time in this loop!
2175                 unsigned char swp_count;      !! 1489                  * Test inline before going to call unuse_pte.
2176                 swp_entry_t entry;            !! 1490                  */
2177                 int ret;                      !! 1491                 if (unlikely(pte_same_as_swp(*pte, swp_pte))) {
2178                 pte_t ptent;                  !! 1492                         pte_unmap(pte);
2179                                               !! 1493                         ret = unuse_pte(vma, pmd, addr, entry, page);
2180                 if (!pte++) {                 !! 1494                         if (ret)
                                                   >> 1495                                 goto out;
2181                         pte = pte_offset_map(    1496                         pte = pte_offset_map(pmd, addr);
2182                         if (!pte)             << 
2183                                 break;        << 
2184                 }                             << 
2185                                               << 
2186                 ptent = ptep_get_lockless(pte << 
2187                                               << 
2188                 if (!is_swap_pte(ptent))      << 
2189                         continue;             << 
2190                                               << 
2191                 entry = pte_to_swp_entry(pten << 
2192                 if (swp_type(entry) != type)  << 
2193                         continue;             << 
2194                                               << 
2195                 offset = swp_offset(entry);   << 
2196                 pte_unmap(pte);               << 
2197                 pte = NULL;                   << 
2198                                               << 
2199                 folio = swap_cache_get_folio( << 
2200                 if (!folio) {                 << 
2201                         struct vm_fault vmf = << 
2202                                 .vma = vma,   << 
2203                                 .address = ad << 
2204                                 .real_address << 
2205                                 .pmd = pmd,   << 
2206                         };                    << 
2207                                               << 
2208                         folio = swapin_readah << 
2209                                               << 
2210                 }                             << 
2211                 if (!folio) {                 << 
2212                         swp_count = READ_ONCE << 
2213                         if (swp_count == 0 || << 
2214                                 continue;     << 
2215                         return -ENOMEM;       << 
2216                 }                             << 
2217                                               << 
2218                 folio_lock(folio);            << 
2219                 folio_wait_writeback(folio);  << 
2220                 ret = unuse_pte(vma, pmd, add << 
2221                 if (ret < 0) {                << 
2222                         folio_unlock(folio);  << 
2223                         folio_put(folio);     << 
2224                         return ret;           << 
2225                 }                                1497                 }
2226                                               !! 1498         } while (pte++, addr += PAGE_SIZE, addr != end);
2227                 folio_free_swap(folio);       !! 1499         pte_unmap(pte - 1);
2228                 folio_unlock(folio);          !! 1500 out:
2229                 folio_put(folio);             !! 1501         return ret;
2230         } while (addr += PAGE_SIZE, addr != e << 
2231                                               << 
2232         if (pte)                              << 
2233                 pte_unmap(pte);               << 
2234         return 0;                             << 
2235 }                                                1502 }
2236                                                  1503 
2237 static inline int unuse_pmd_range(struct vm_a    1504 static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
2238                                 unsigned long    1505                                 unsigned long addr, unsigned long end,
2239                                 unsigned int  !! 1506                                 swp_entry_t entry, struct page *page)
2240 {                                                1507 {
2241         pmd_t *pmd;                              1508         pmd_t *pmd;
2242         unsigned long next;                      1509         unsigned long next;
2243         int ret;                                 1510         int ret;
2244                                                  1511 
2245         pmd = pmd_offset(pud, addr);             1512         pmd = pmd_offset(pud, addr);
2246         do {                                     1513         do {
2247                 cond_resched();                  1514                 cond_resched();
2248                 next = pmd_addr_end(addr, end    1515                 next = pmd_addr_end(addr, end);
2249                 ret = unuse_pte_range(vma, pm !! 1516                 if (pmd_none_or_trans_huge_or_clear_bad(pmd))
                                                   >> 1517                         continue;
                                                   >> 1518                 ret = unuse_pte_range(vma, pmd, addr, next, entry, page);
2250                 if (ret)                         1519                 if (ret)
2251                         return ret;              1520                         return ret;
2252         } while (pmd++, addr = next, addr !=     1521         } while (pmd++, addr = next, addr != end);
2253         return 0;                                1522         return 0;
2254 }                                                1523 }
2255                                                  1524 
2256 static inline int unuse_pud_range(struct vm_a    1525 static inline int unuse_pud_range(struct vm_area_struct *vma, p4d_t *p4d,
2257                                 unsigned long    1526                                 unsigned long addr, unsigned long end,
2258                                 unsigned int  !! 1527                                 swp_entry_t entry, struct page *page)
2259 {                                                1528 {
2260         pud_t *pud;                              1529         pud_t *pud;
2261         unsigned long next;                      1530         unsigned long next;
2262         int ret;                                 1531         int ret;
2263                                                  1532 
2264         pud = pud_offset(p4d, addr);             1533         pud = pud_offset(p4d, addr);
2265         do {                                     1534         do {
2266                 next = pud_addr_end(addr, end    1535                 next = pud_addr_end(addr, end);
2267                 if (pud_none_or_clear_bad(pud    1536                 if (pud_none_or_clear_bad(pud))
2268                         continue;                1537                         continue;
2269                 ret = unuse_pmd_range(vma, pu !! 1538                 ret = unuse_pmd_range(vma, pud, addr, next, entry, page);
2270                 if (ret)                         1539                 if (ret)
2271                         return ret;              1540                         return ret;
2272         } while (pud++, addr = next, addr !=     1541         } while (pud++, addr = next, addr != end);
2273         return 0;                                1542         return 0;
2274 }                                                1543 }
2275                                                  1544 
2276 static inline int unuse_p4d_range(struct vm_a    1545 static inline int unuse_p4d_range(struct vm_area_struct *vma, pgd_t *pgd,
2277                                 unsigned long    1546                                 unsigned long addr, unsigned long end,
2278                                 unsigned int  !! 1547                                 swp_entry_t entry, struct page *page)
2279 {                                                1548 {
2280         p4d_t *p4d;                              1549         p4d_t *p4d;
2281         unsigned long next;                      1550         unsigned long next;
2282         int ret;                                 1551         int ret;
2283                                                  1552 
2284         p4d = p4d_offset(pgd, addr);             1553         p4d = p4d_offset(pgd, addr);
2285         do {                                     1554         do {
2286                 next = p4d_addr_end(addr, end    1555                 next = p4d_addr_end(addr, end);
2287                 if (p4d_none_or_clear_bad(p4d    1556                 if (p4d_none_or_clear_bad(p4d))
2288                         continue;                1557                         continue;
2289                 ret = unuse_pud_range(vma, p4 !! 1558                 ret = unuse_pud_range(vma, p4d, addr, next, entry, page);
2290                 if (ret)                         1559                 if (ret)
2291                         return ret;              1560                         return ret;
2292         } while (p4d++, addr = next, addr !=     1561         } while (p4d++, addr = next, addr != end);
2293         return 0;                                1562         return 0;
2294 }                                                1563 }
2295                                                  1564 
2296 static int unuse_vma(struct vm_area_struct *v !! 1565 static int unuse_vma(struct vm_area_struct *vma,
                                                   >> 1566                                 swp_entry_t entry, struct page *page)
2297 {                                                1567 {
2298         pgd_t *pgd;                              1568         pgd_t *pgd;
2299         unsigned long addr, end, next;           1569         unsigned long addr, end, next;
2300         int ret;                                 1570         int ret;
2301                                                  1571 
2302         addr = vma->vm_start;                 !! 1572         if (page_anon_vma(page)) {
2303         end = vma->vm_end;                    !! 1573                 addr = page_address_in_vma(page, vma);
                                                   >> 1574                 if (addr == -EFAULT)
                                                   >> 1575                         return 0;
                                                   >> 1576                 else
                                                   >> 1577                         end = addr + PAGE_SIZE;
                                                   >> 1578         } else {
                                                   >> 1579                 addr = vma->vm_start;
                                                   >> 1580                 end = vma->vm_end;
                                                   >> 1581         }
2304                                                  1582 
2305         pgd = pgd_offset(vma->vm_mm, addr);      1583         pgd = pgd_offset(vma->vm_mm, addr);
2306         do {                                     1584         do {
2307                 next = pgd_addr_end(addr, end    1585                 next = pgd_addr_end(addr, end);
2308                 if (pgd_none_or_clear_bad(pgd    1586                 if (pgd_none_or_clear_bad(pgd))
2309                         continue;                1587                         continue;
2310                 ret = unuse_p4d_range(vma, pg !! 1588                 ret = unuse_p4d_range(vma, pgd, addr, next, entry, page);
2311                 if (ret)                         1589                 if (ret)
2312                         return ret;              1590                         return ret;
2313         } while (pgd++, addr = next, addr !=     1591         } while (pgd++, addr = next, addr != end);
2314         return 0;                                1592         return 0;
2315 }                                                1593 }
2316                                                  1594 
2317 static int unuse_mm(struct mm_struct *mm, uns !! 1595 static int unuse_mm(struct mm_struct *mm,
                                                   >> 1596                                 swp_entry_t entry, struct page *page)
2318 {                                                1597 {
2319         struct vm_area_struct *vma;              1598         struct vm_area_struct *vma;
2320         int ret = 0;                             1599         int ret = 0;
2321         VMA_ITERATOR(vmi, mm, 0);             << 
2322                                               << 
2323         mmap_read_lock(mm);                   << 
2324         for_each_vma(vmi, vma) {              << 
2325                 if (vma->anon_vma && !is_vm_h << 
2326                         ret = unuse_vma(vma,  << 
2327                         if (ret)              << 
2328                                 break;        << 
2329                 }                             << 
2330                                                  1600 
                                                   >> 1601         if (!down_read_trylock(&mm->mmap_sem)) {
                                                   >> 1602                 /*
                                                   >> 1603                  * Activate page so shrink_inactive_list is unlikely to unmap
                                                   >> 1604                  * its ptes while lock is dropped, so swapoff can make progress.
                                                   >> 1605                  */
                                                   >> 1606                 activate_page(page);
                                                   >> 1607                 unlock_page(page);
                                                   >> 1608                 down_read(&mm->mmap_sem);
                                                   >> 1609                 lock_page(page);
                                                   >> 1610         }
                                                   >> 1611         for (vma = mm->mmap; vma; vma = vma->vm_next) {
                                                   >> 1612                 if (vma->anon_vma && (ret = unuse_vma(vma, entry, page)))
                                                   >> 1613                         break;
2331                 cond_resched();                  1614                 cond_resched();
2332         }                                        1615         }
2333         mmap_read_unlock(mm);                 !! 1616         up_read(&mm->mmap_sem);
2334         return ret;                           !! 1617         return (ret < 0)? ret: 0;
2335 }                                                1618 }
2336                                                  1619 
2337 /*                                               1620 /*
2338  * Scan swap_map from current position to nex !! 1621  * Scan swap_map (or frontswap_map if frontswap parameter is true)
2339  * Return 0 if there are no inuse entries aft !! 1622  * from current position to next entry still in use.
2340  * the map.                                   !! 1623  * Recycle to start on reaching the end, returning 0 when empty.
2341  */                                              1624  */
2342 static unsigned int find_next_to_unuse(struct    1625 static unsigned int find_next_to_unuse(struct swap_info_struct *si,
2343                                         unsig !! 1626                                         unsigned int prev, bool frontswap)
2344 {                                                1627 {
2345         unsigned int i;                       !! 1628         unsigned int max = si->max;
                                                   >> 1629         unsigned int i = prev;
2346         unsigned char count;                     1630         unsigned char count;
2347                                                  1631 
2348         /*                                       1632         /*
2349          * No need for swap_lock here: we're     1633          * No need for swap_lock here: we're just looking
2350          * for whether an entry is in use, no    1634          * for whether an entry is in use, not modifying it; false
2351          * hits are okay, and sys_swapoff() h    1635          * hits are okay, and sys_swapoff() has already prevented new
2352          * allocations from this area (while     1636          * allocations from this area (while holding swap_lock).
2353          */                                      1637          */
2354         for (i = prev + 1; i < si->max; i++)  !! 1638         for (;;) {
                                                   >> 1639                 if (++i >= max) {
                                                   >> 1640                         if (!prev) {
                                                   >> 1641                                 i = 0;
                                                   >> 1642                                 break;
                                                   >> 1643                         }
                                                   >> 1644                         /*
                                                   >> 1645                          * No entries in use at top of swap_map,
                                                   >> 1646                          * loop back to start and recheck there.
                                                   >> 1647                          */
                                                   >> 1648                         max = prev + 1;
                                                   >> 1649                         prev = 0;
                                                   >> 1650                         i = 1;
                                                   >> 1651                 }
2355                 count = READ_ONCE(si->swap_ma    1652                 count = READ_ONCE(si->swap_map[i]);
2356                 if (count && swap_count(count    1653                 if (count && swap_count(count) != SWAP_MAP_BAD)
2357                         break;                !! 1654                         if (!frontswap || frontswap_test(si, i))
                                                   >> 1655                                 break;
2358                 if ((i % LATENCY_LIMIT) == 0)    1656                 if ((i % LATENCY_LIMIT) == 0)
2359                         cond_resched();          1657                         cond_resched();
2360         }                                        1658         }
2361                                               << 
2362         if (i == si->max)                     << 
2363                 i = 0;                        << 
2364                                               << 
2365         return i;                                1659         return i;
2366 }                                                1660 }
2367                                                  1661 
2368 static int try_to_unuse(unsigned int type)    !! 1662 /*
                                                   >> 1663  * We completely avoid races by reading each swap page in advance,
                                                   >> 1664  * and then search for the process using it.  All the necessary
                                                   >> 1665  * page table adjustments can then be made atomically.
                                                   >> 1666  *
                                                   >> 1667  * if the boolean frontswap is true, only unuse pages_to_unuse pages;
                                                   >> 1668  * pages_to_unuse==0 means all pages; ignored if frontswap is false
                                                   >> 1669  */
                                                   >> 1670 int try_to_unuse(unsigned int type, bool frontswap,
                                                   >> 1671                  unsigned long pages_to_unuse)
2369 {                                                1672 {
2370         struct mm_struct *prev_mm;            << 
2371         struct mm_struct *mm;                 << 
2372         struct list_head *p;                  << 
2373         int retval = 0;                       << 
2374         struct swap_info_struct *si = swap_in    1673         struct swap_info_struct *si = swap_info[type];
2375         struct folio *folio;                  !! 1674         struct mm_struct *start_mm;
                                                   >> 1675         volatile unsigned char *swap_map; /* swap_map is accessed without
                                                   >> 1676                                            * locking. Mark it as volatile
                                                   >> 1677                                            * to prevent compiler doing
                                                   >> 1678                                            * something odd.
                                                   >> 1679                                            */
                                                   >> 1680         unsigned char swcount;
                                                   >> 1681         struct page *page;
2376         swp_entry_t entry;                       1682         swp_entry_t entry;
2377         unsigned int i;                       !! 1683         unsigned int i = 0;
                                                   >> 1684         int retval = 0;
2378                                                  1685 
2379         if (!READ_ONCE(si->inuse_pages))      !! 1686         /*
2380                 goto success;                 !! 1687          * When searching mms for an entry, a good strategy is to
                                                   >> 1688          * start at the first mm we freed the previous entry from
                                                   >> 1689          * (though actually we don't notice whether we or coincidence
                                                   >> 1690          * freed the entry).  Initialize this start_mm with a hold.
                                                   >> 1691          *
                                                   >> 1692          * A simpler strategy would be to start at the last mm we
                                                   >> 1693          * freed the previous entry from; but that would take less
                                                   >> 1694          * advantage of mmlist ordering, which clusters forked mms
                                                   >> 1695          * together, child after parent.  If we race with dup_mmap(), we
                                                   >> 1696          * prefer to resolve parent before child, lest we miss entries
                                                   >> 1697          * duplicated after we scanned child: using last mm would invert
                                                   >> 1698          * that.
                                                   >> 1699          */
                                                   >> 1700         start_mm = &init_mm;
                                                   >> 1701         mmget(&init_mm);
                                                   >> 1702 
                                                   >> 1703         /*
                                                   >> 1704          * Keep on scanning until all entries have gone.  Usually,
                                                   >> 1705          * one pass through swap_map is enough, but not necessarily:
                                                   >> 1706          * there are races when an instance of an entry might be missed.
                                                   >> 1707          */
                                                   >> 1708         while ((i = find_next_to_unuse(si, i, frontswap)) != 0) {
                                                   >> 1709                 if (signal_pending(current)) {
                                                   >> 1710                         retval = -EINTR;
                                                   >> 1711                         break;
                                                   >> 1712                 }
2381                                                  1713 
2382 retry:                                        !! 1714                 /*
2383         retval = shmem_unuse(type);           !! 1715                  * Get a page for the entry, using the existing swap
2384         if (retval)                           !! 1716                  * cache page if there is one.  Otherwise, get a clean
2385                 return retval;                !! 1717                  * page and read the swap into it.
                                                   >> 1718                  */
                                                   >> 1719                 swap_map = &si->swap_map[i];
                                                   >> 1720                 entry = swp_entry(type, i);
                                                   >> 1721                 page = read_swap_cache_async(entry,
                                                   >> 1722                                         GFP_HIGHUSER_MOVABLE, NULL, 0);
                                                   >> 1723                 if (!page) {
                                                   >> 1724                         /*
                                                   >> 1725                          * Either swap_duplicate() failed because entry
                                                   >> 1726                          * has been freed independently, and will not be
                                                   >> 1727                          * reused since sys_swapoff() already disabled
                                                   >> 1728                          * allocation from here, or alloc_page() failed.
                                                   >> 1729                          */
                                                   >> 1730                         swcount = *swap_map;
                                                   >> 1731                         /*
                                                   >> 1732                          * We don't hold lock here, so the swap entry could be
                                                   >> 1733                          * SWAP_MAP_BAD (when the cluster is discarding).
                                                   >> 1734                          * Instead of fail out, We can just skip the swap
                                                   >> 1735                          * entry because swapoff will wait for discarding
                                                   >> 1736                          * finish anyway.
                                                   >> 1737                          */
                                                   >> 1738                         if (!swcount || swcount == SWAP_MAP_BAD)
                                                   >> 1739                                 continue;
                                                   >> 1740                         retval = -ENOMEM;
                                                   >> 1741                         break;
                                                   >> 1742                 }
2386                                                  1743 
2387         prev_mm = &init_mm;                   !! 1744                 /*
2388         mmget(prev_mm);                       !! 1745                  * Don't hold on to start_mm if it looks like exiting.
                                                   >> 1746                  */
                                                   >> 1747                 if (atomic_read(&start_mm->mm_users) == 1) {
                                                   >> 1748                         mmput(start_mm);
                                                   >> 1749                         start_mm = &init_mm;
                                                   >> 1750                         mmget(&init_mm);
                                                   >> 1751                 }
2389                                                  1752 
2390         spin_lock(&mmlist_lock);              !! 1753                 /*
2391         p = &init_mm.mmlist;                  !! 1754                  * Wait for and lock page.  When do_swap_page races with
2392         while (READ_ONCE(si->inuse_pages) &&  !! 1755                  * try_to_unuse, do_swap_page can handle the fault much
2393                !signal_pending(current) &&    !! 1756                  * faster than try_to_unuse can locate the entry.  This
2394                (p = p->next) != &init_mm.mmli !! 1757                  * apparently redundant "wait_on_page_locked" lets try_to_unuse
                                                   >> 1758                  * defer to do_swap_page in such a case - in some tests,
                                                   >> 1759                  * do_swap_page and try_to_unuse repeatedly compete.
                                                   >> 1760                  */
                                                   >> 1761                 wait_on_page_locked(page);
                                                   >> 1762                 wait_on_page_writeback(page);
                                                   >> 1763                 lock_page(page);
                                                   >> 1764                 wait_on_page_writeback(page);
2395                                                  1765 
2396                 mm = list_entry(p, struct mm_ !! 1766                 /*
2397                 if (!mmget_not_zero(mm))      !! 1767                  * Remove all references to entry.
                                                   >> 1768                  */
                                                   >> 1769                 swcount = *swap_map;
                                                   >> 1770                 if (swap_count(swcount) == SWAP_MAP_SHMEM) {
                                                   >> 1771                         retval = shmem_unuse(entry, page);
                                                   >> 1772                         /* page has already been unlocked and released */
                                                   >> 1773                         if (retval < 0)
                                                   >> 1774                                 break;
2398                         continue;                1775                         continue;
2399                 spin_unlock(&mmlist_lock);    !! 1776                 }
2400                 mmput(prev_mm);               !! 1777                 if (swap_count(swcount) && start_mm != &init_mm)
2401                 prev_mm = mm;                 !! 1778                         retval = unuse_mm(start_mm, entry, page);
2402                 retval = unuse_mm(mm, type);  !! 1779 
2403                 if (retval) {                 !! 1780                 if (swap_count(*swap_map)) {
                                                   >> 1781                         int set_start_mm = (*swap_map >= swcount);
                                                   >> 1782                         struct list_head *p = &start_mm->mmlist;
                                                   >> 1783                         struct mm_struct *new_start_mm = start_mm;
                                                   >> 1784                         struct mm_struct *prev_mm = start_mm;
                                                   >> 1785                         struct mm_struct *mm;
                                                   >> 1786 
                                                   >> 1787                         mmget(new_start_mm);
                                                   >> 1788                         mmget(prev_mm);
                                                   >> 1789                         spin_lock(&mmlist_lock);
                                                   >> 1790                         while (swap_count(*swap_map) && !retval &&
                                                   >> 1791                                         (p = p->next) != &start_mm->mmlist) {
                                                   >> 1792                                 mm = list_entry(p, struct mm_struct, mmlist);
                                                   >> 1793                                 if (!mmget_not_zero(mm))
                                                   >> 1794                                         continue;
                                                   >> 1795                                 spin_unlock(&mmlist_lock);
                                                   >> 1796                                 mmput(prev_mm);
                                                   >> 1797                                 prev_mm = mm;
                                                   >> 1798 
                                                   >> 1799                                 cond_resched();
                                                   >> 1800 
                                                   >> 1801                                 swcount = *swap_map;
                                                   >> 1802                                 if (!swap_count(swcount)) /* any usage ? */
                                                   >> 1803                                         ;
                                                   >> 1804                                 else if (mm == &init_mm)
                                                   >> 1805                                         set_start_mm = 1;
                                                   >> 1806                                 else
                                                   >> 1807                                         retval = unuse_mm(mm, entry, page);
                                                   >> 1808 
                                                   >> 1809                                 if (set_start_mm && *swap_map < swcount) {
                                                   >> 1810                                         mmput(new_start_mm);
                                                   >> 1811                                         mmget(mm);
                                                   >> 1812                                         new_start_mm = mm;
                                                   >> 1813                                         set_start_mm = 0;
                                                   >> 1814                                 }
                                                   >> 1815                                 spin_lock(&mmlist_lock);
                                                   >> 1816                         }
                                                   >> 1817                         spin_unlock(&mmlist_lock);
2404                         mmput(prev_mm);          1818                         mmput(prev_mm);
2405                         return retval;        !! 1819                         mmput(start_mm);
                                                   >> 1820                         start_mm = new_start_mm;
                                                   >> 1821                 }
                                                   >> 1822                 if (retval) {
                                                   >> 1823                         unlock_page(page);
                                                   >> 1824                         put_page(page);
                                                   >> 1825                         break;
2406                 }                                1826                 }
2407                                                  1827 
2408                 /*                               1828                 /*
2409                  * Make sure that we aren't c !! 1829                  * If a reference remains (rare), we would like to leave
2410                  * interactive performance.   !! 1830                  * the page in the swap cache; but try_to_unmap could
2411                  */                           !! 1831                  * then re-duplicate the entry once we drop page lock,
2412                 cond_resched();               !! 1832                  * so we might loop indefinitely; also, that page could
2413                 spin_lock(&mmlist_lock);      !! 1833                  * not be swapped out to other storage meanwhile.  So:
2414         }                                     !! 1834                  * delete from cache even if there's another reference,
2415         spin_unlock(&mmlist_lock);            !! 1835                  * after ensuring that the data has been saved to disk -
2416                                               !! 1836                  * since if the reference remains (rarer), it will be
2417         mmput(prev_mm);                       !! 1837                  * read from disk into another page.  Splitting into two
                                                   >> 1838                  * pages would be incorrect if swap supported "shared
                                                   >> 1839                  * private" pages, but they are handled by tmpfs files.
                                                   >> 1840                  *
                                                   >> 1841                  * Given how unuse_vma() targets one particular offset
                                                   >> 1842                  * in an anon_vma, once the anon_vma has been determined,
                                                   >> 1843                  * this splitting happens to be just what is needed to
                                                   >> 1844                  * handle where KSM pages have been swapped out: re-reading
                                                   >> 1845                  * is unnecessarily slow, but we can fix that later on.
                                                   >> 1846                  */
                                                   >> 1847                 if (swap_count(*swap_map) &&
                                                   >> 1848                      PageDirty(page) && PageSwapCache(page)) {
                                                   >> 1849                         struct writeback_control wbc = {
                                                   >> 1850                                 .sync_mode = WB_SYNC_NONE,
                                                   >> 1851                         };
2418                                                  1852 
2419         i = 0;                                !! 1853                         swap_writepage(page, &wbc);
2420         while (READ_ONCE(si->inuse_pages) &&  !! 1854                         lock_page(page);
2421                !signal_pending(current) &&    !! 1855                         wait_on_page_writeback(page);
2422                (i = find_next_to_unuse(si, i) !! 1856                 }
2423                                                  1857 
2424                 entry = swp_entry(type, i);   !! 1858                 /*
2425                 folio = filemap_get_folio(swa !! 1859                  * It is conceivable that a racing task removed this page from
2426                 if (IS_ERR(folio))            !! 1860                  * swap cache just before we acquired the page lock at the top,
2427                         continue;             !! 1861                  * or while we dropped it in unuse_mm().  The page might even
                                                   >> 1862                  * be back in swap cache on another swap area: that we must not
                                                   >> 1863                  * delete, since it may not have been written out to swap yet.
                                                   >> 1864                  */
                                                   >> 1865                 if (PageSwapCache(page) &&
                                                   >> 1866                     likely(page_private(page) == entry.val))
                                                   >> 1867                         delete_from_swap_cache(page);
2428                                                  1868 
2429                 /*                               1869                 /*
2430                  * It is conceivable that a r !! 1870                  * So we could skip searching mms once swap count went
2431                  * swap cache just before we  !! 1871                  * to 1, we did not mark any present ptes as dirty: must
2432                  * might even be back in swap !! 1872                  * mark page dirty so shrink_page_list will preserve it.
2433                  * that is okay, folio_free_s !! 1873                  */
2434                  */                           !! 1874                 SetPageDirty(page);
2435                 folio_lock(folio);            !! 1875                 unlock_page(page);
2436                 folio_wait_writeback(folio);  !! 1876                 put_page(page);
2437                 folio_free_swap(folio);       << 
2438                 folio_unlock(folio);          << 
2439                 folio_put(folio);             << 
2440         }                                     << 
2441                                                  1877 
2442         /*                                    !! 1878                 /*
2443          * Lets check again to see if there a !! 1879                  * Make sure that we aren't completely killing
2444          * If yes, we would need to do retry  !! 1880                  * interactive performance.
2445          * Under global memory pressure, swap !! 1881                  */
2446          * into process space after the mmlis !! 1882                 cond_resched();
2447          *                                    !! 1883                 if (frontswap && pages_to_unuse > 0) {
2448          * Limit the number of retries? No: w !! 1884                         if (!--pages_to_unuse)
2449          * above fails, that mm is likely to  !! 1885                                 break;
2450          * exit_mmap(), which proceeds at its !! 1886                 }
2451          * and even shmem_writepage() could h << 
2452          * folio_alloc_swap(), temporarily hi << 
2453          * and robust (though cpu-intensive)  << 
2454          */                                   << 
2455         if (READ_ONCE(si->inuse_pages)) {     << 
2456                 if (!signal_pending(current)) << 
2457                         goto retry;           << 
2458                 return -EINTR;                << 
2459         }                                        1887         }
2460                                                  1888 
2461 success:                                      !! 1889         mmput(start_mm);
2462         /*                                    !! 1890         return retval;
2463          * Make sure that further cleanups af << 
2464          * after swap_range_free() reduces si << 
2465          */                                   << 
2466         smp_mb();                             << 
2467         return 0;                             << 
2468 }                                                1891 }
2469                                                  1892 
2470 /*                                               1893 /*
2471  * After a successful try_to_unuse, if no swa    1894  * After a successful try_to_unuse, if no swap is now in use, we know
2472  * we can empty the mmlist.  swap_lock must b    1895  * we can empty the mmlist.  swap_lock must be held on entry and exit.
2473  * Note that mmlist_lock nests inside swap_lo    1896  * Note that mmlist_lock nests inside swap_lock, and an mm must be
2474  * added to the mmlist just after page_duplic    1897  * added to the mmlist just after page_duplicate - before would be racy.
2475  */                                              1898  */
2476 static void drain_mmlist(void)                   1899 static void drain_mmlist(void)
2477 {                                                1900 {
2478         struct list_head *p, *next;              1901         struct list_head *p, *next;
2479         unsigned int type;                       1902         unsigned int type;
2480                                                  1903 
2481         for (type = 0; type < nr_swapfiles; t    1904         for (type = 0; type < nr_swapfiles; type++)
2482                 if (swap_info[type]->inuse_pa    1905                 if (swap_info[type]->inuse_pages)
2483                         return;                  1906                         return;
2484         spin_lock(&mmlist_lock);                 1907         spin_lock(&mmlist_lock);
2485         list_for_each_safe(p, next, &init_mm.    1908         list_for_each_safe(p, next, &init_mm.mmlist)
2486                 list_del_init(p);                1909                 list_del_init(p);
2487         spin_unlock(&mmlist_lock);               1910         spin_unlock(&mmlist_lock);
2488 }                                                1911 }
2489                                                  1912 
2490 /*                                               1913 /*
                                                   >> 1914  * Use this swapdev's extent info to locate the (PAGE_SIZE) block which
                                                   >> 1915  * corresponds to page offset for the specified swap entry.
                                                   >> 1916  * Note that the type of this function is sector_t, but it returns page offset
                                                   >> 1917  * into the bdev, not sector offset.
                                                   >> 1918  */
                                                   >> 1919 static sector_t map_swap_entry(swp_entry_t entry, struct block_device **bdev)
                                                   >> 1920 {
                                                   >> 1921         struct swap_info_struct *sis;
                                                   >> 1922         struct swap_extent *start_se;
                                                   >> 1923         struct swap_extent *se;
                                                   >> 1924         pgoff_t offset;
                                                   >> 1925 
                                                   >> 1926         sis = swap_info[swp_type(entry)];
                                                   >> 1927         *bdev = sis->bdev;
                                                   >> 1928 
                                                   >> 1929         offset = swp_offset(entry);
                                                   >> 1930         start_se = sis->curr_swap_extent;
                                                   >> 1931         se = start_se;
                                                   >> 1932 
                                                   >> 1933         for ( ; ; ) {
                                                   >> 1934                 if (se->start_page <= offset &&
                                                   >> 1935                                 offset < (se->start_page + se->nr_pages)) {
                                                   >> 1936                         return se->start_block + (offset - se->start_page);
                                                   >> 1937                 }
                                                   >> 1938                 se = list_next_entry(se, list);
                                                   >> 1939                 sis->curr_swap_extent = se;
                                                   >> 1940                 BUG_ON(se == start_se);         /* It *must* be present */
                                                   >> 1941         }
                                                   >> 1942 }
                                                   >> 1943 
                                                   >> 1944 /*
                                                   >> 1945  * Returns the page offset into bdev for the specified page's swap entry.
                                                   >> 1946  */
                                                   >> 1947 sector_t map_swap_page(struct page *page, struct block_device **bdev)
                                                   >> 1948 {
                                                   >> 1949         swp_entry_t entry;
                                                   >> 1950         entry.val = page_private(page);
                                                   >> 1951         return map_swap_entry(entry, bdev);
                                                   >> 1952 }
                                                   >> 1953 
                                                   >> 1954 /*
2491  * Free all of a swapdev's extent information    1955  * Free all of a swapdev's extent information
2492  */                                              1956  */
2493 static void destroy_swap_extents(struct swap_    1957 static void destroy_swap_extents(struct swap_info_struct *sis)
2494 {                                                1958 {
2495         while (!RB_EMPTY_ROOT(&sis->swap_exte !! 1959         while (!list_empty(&sis->first_swap_extent.list)) {
2496                 struct rb_node *rb = sis->swa !! 1960                 struct swap_extent *se;
2497                 struct swap_extent *se = rb_e << 
2498                                                  1961 
2499                 rb_erase(rb, &sis->swap_exten !! 1962                 se = list_first_entry(&sis->first_swap_extent.list,
                                                   >> 1963                                 struct swap_extent, list);
                                                   >> 1964                 list_del(&se->list);
2500                 kfree(se);                       1965                 kfree(se);
2501         }                                        1966         }
2502                                                  1967 
2503         if (sis->flags & SWP_ACTIVATED) {     !! 1968         if (sis->flags & SWP_FILE) {
2504                 struct file *swap_file = sis-    1969                 struct file *swap_file = sis->swap_file;
2505                 struct address_space *mapping    1970                 struct address_space *mapping = swap_file->f_mapping;
2506                                                  1971 
2507                 sis->flags &= ~SWP_ACTIVATED; !! 1972                 sis->flags &= ~SWP_FILE;
2508                 if (mapping->a_ops->swap_deac !! 1973                 mapping->a_ops->swap_deactivate(swap_file);
2509                         mapping->a_ops->swap_ << 
2510         }                                        1974         }
2511 }                                                1975 }
2512                                                  1976 
2513 /*                                               1977 /*
2514  * Add a block range (and the corresponding p    1978  * Add a block range (and the corresponding page range) into this swapdev's
2515  * extent tree.                               !! 1979  * extent list.  The extent list is kept sorted in page order.
2516  *                                               1980  *
2517  * This function rather assumes that it is ca    1981  * This function rather assumes that it is called in ascending page order.
2518  */                                              1982  */
2519 int                                              1983 int
2520 add_swap_extent(struct swap_info_struct *sis,    1984 add_swap_extent(struct swap_info_struct *sis, unsigned long start_page,
2521                 unsigned long nr_pages, secto    1985                 unsigned long nr_pages, sector_t start_block)
2522 {                                                1986 {
2523         struct rb_node **link = &sis->swap_ex << 
2524         struct swap_extent *se;                  1987         struct swap_extent *se;
2525         struct swap_extent *new_se;              1988         struct swap_extent *new_se;
                                                   >> 1989         struct list_head *lh;
2526                                                  1990 
2527         /*                                    !! 1991         if (start_page == 0) {
2528          * place the new node at the right mo !! 1992                 se = &sis->first_swap_extent;
2529          * function is called in ascending pa !! 1993                 sis->curr_swap_extent = se;
2530          */                                   !! 1994                 se->start_page = 0;
2531         while (*link) {                       !! 1995                 se->nr_pages = nr_pages;
2532                 parent = *link;               !! 1996                 se->start_block = start_block;
2533                 link = &parent->rb_right;     !! 1997                 return 1;
2534         }                                     !! 1998         } else {
2535                                               !! 1999                 lh = sis->first_swap_extent.list.prev;  /* Highest extent */
2536         if (parent) {                         !! 2000                 se = list_entry(lh, struct swap_extent, list);
2537                 se = rb_entry(parent, struct  << 
2538                 BUG_ON(se->start_page + se->n    2001                 BUG_ON(se->start_page + se->nr_pages != start_page);
2539                 if (se->start_block + se->nr_    2002                 if (se->start_block + se->nr_pages == start_block) {
2540                         /* Merge it */           2003                         /* Merge it */
2541                         se->nr_pages += nr_pa    2004                         se->nr_pages += nr_pages;
2542                         return 0;                2005                         return 0;
2543                 }                                2006                 }
2544         }                                        2007         }
2545                                                  2008 
2546         /* No merge, insert a new extent. */  !! 2009         /*
                                                   >> 2010          * No merge.  Insert a new extent, preserving ordering.
                                                   >> 2011          */
2547         new_se = kmalloc(sizeof(*se), GFP_KER    2012         new_se = kmalloc(sizeof(*se), GFP_KERNEL);
2548         if (new_se == NULL)                      2013         if (new_se == NULL)
2549                 return -ENOMEM;                  2014                 return -ENOMEM;
2550         new_se->start_page = start_page;         2015         new_se->start_page = start_page;
2551         new_se->nr_pages = nr_pages;             2016         new_se->nr_pages = nr_pages;
2552         new_se->start_block = start_block;       2017         new_se->start_block = start_block;
2553                                                  2018 
2554         rb_link_node(&new_se->rb_node, parent !! 2019         list_add_tail(&new_se->list, &sis->first_swap_extent.list);
2555         rb_insert_color(&new_se->rb_node, &si << 
2556         return 1;                                2020         return 1;
2557 }                                                2021 }
2558 EXPORT_SYMBOL_GPL(add_swap_extent);           << 
2559                                                  2022 
2560 /*                                               2023 /*
2561  * A `swap extent' is a simple thing which ma    2024  * A `swap extent' is a simple thing which maps a contiguous range of pages
2562  * onto a contiguous range of disk blocks.  A !! 2025  * onto a contiguous range of disk blocks.  An ordered list of swap extents
2563  * built at swapon time and is then used at s !! 2026  * is built at swapon time and is then used at swap_writepage/swap_readpage
2564  * time for locating where on disk a page bel    2027  * time for locating where on disk a page belongs.
2565  *                                               2028  *
2566  * If the swapfile is an S_ISBLK block device    2029  * If the swapfile is an S_ISBLK block device, a single extent is installed.
2567  * This is done so that the main operating co    2030  * This is done so that the main operating code can treat S_ISBLK and S_ISREG
2568  * swap files identically.                       2031  * swap files identically.
2569  *                                               2032  *
2570  * Whether the swapdev is an S_ISREG file or     2033  * Whether the swapdev is an S_ISREG file or an S_ISBLK blockdev, the swap
2571  * extent rbtree operates in PAGE_SIZE disk b !! 2034  * extent list operates in PAGE_SIZE disk blocks.  Both S_ISREG and S_ISBLK
2572  * swapfiles are handled *identically* after     2035  * swapfiles are handled *identically* after swapon time.
2573  *                                               2036  *
2574  * For S_ISREG swapfiles, setup_swap_extents(    2037  * For S_ISREG swapfiles, setup_swap_extents() will walk all the file's blocks
2575  * and will parse them into a rbtree, in PAGE !! 2038  * and will parse them into an ordered extent list, in PAGE_SIZE chunks.  If
2576  * blocks are found which do not fall within  !! 2039  * some stray blocks are found which do not fall within the PAGE_SIZE alignment
2577  * requirements, they are simply tossed out -    2040  * requirements, they are simply tossed out - we will never use those blocks
2578  * for swapping.                                 2041  * for swapping.
2579  *                                               2042  *
2580  * For all swap devices we set S_SWAPFILE acr !! 2043  * For S_ISREG swapfiles we set S_SWAPFILE across the life of the swapon.  This
2581  * prevents users from writing to the swap de !! 2044  * prevents root from shooting her foot off by ftruncating an in-use swapfile,
                                                   >> 2045  * which will scribble on the fs.
2582  *                                               2046  *
2583  * The amount of disk space which a single sw    2047  * The amount of disk space which a single swap extent represents varies.
2584  * Typically it is in the 1-4 megabyte range.    2048  * Typically it is in the 1-4 megabyte range.  So we can have hundreds of
2585  * extents in the rbtree. - akpm.             !! 2049  * extents in the list.  To avoid much list walking, we cache the previous
                                                   >> 2050  * search location in `curr_swap_extent', and start new searches from there.
                                                   >> 2051  * This is extremely effective.  The average number of iterations in
                                                   >> 2052  * map_swap_page() has been measured at about 0.3 per page.  - akpm.
2586  */                                              2053  */
2587 static int setup_swap_extents(struct swap_inf    2054 static int setup_swap_extents(struct swap_info_struct *sis, sector_t *span)
2588 {                                                2055 {
2589         struct file *swap_file = sis->swap_fi    2056         struct file *swap_file = sis->swap_file;
2590         struct address_space *mapping = swap_    2057         struct address_space *mapping = swap_file->f_mapping;
2591         struct inode *inode = mapping->host;     2058         struct inode *inode = mapping->host;
2592         int ret;                                 2059         int ret;
2593                                                  2060 
2594         if (S_ISBLK(inode->i_mode)) {            2061         if (S_ISBLK(inode->i_mode)) {
2595                 ret = add_swap_extent(sis, 0,    2062                 ret = add_swap_extent(sis, 0, sis->max, 0);
2596                 *span = sis->pages;              2063                 *span = sis->pages;
2597                 return ret;                      2064                 return ret;
2598         }                                        2065         }
2599                                                  2066 
2600         if (mapping->a_ops->swap_activate) {     2067         if (mapping->a_ops->swap_activate) {
2601                 ret = mapping->a_ops->swap_ac    2068                 ret = mapping->a_ops->swap_activate(sis, swap_file, span);
2602                 if (ret < 0)                  !! 2069                 if (!ret) {
2603                         return ret;           !! 2070                         sis->flags |= SWP_FILE;
2604                 sis->flags |= SWP_ACTIVATED;  !! 2071                         ret = add_swap_extent(sis, 0, sis->max, 0);
2605                 if ((sis->flags & SWP_FS_OPS) !! 2072                         *span = sis->pages;
2606                     sio_pool_init() != 0) {   << 
2607                         destroy_swap_extents( << 
2608                         return -ENOMEM;       << 
2609                 }                                2073                 }
2610                 return ret;                      2074                 return ret;
2611         }                                        2075         }
2612                                                  2076 
2613         return generic_swapfile_activate(sis,    2077         return generic_swapfile_activate(sis, swap_file, span);
2614 }                                                2078 }
2615                                                  2079 
2616 static int swap_node(struct swap_info_struct  !! 2080 static void _enable_swap_info(struct swap_info_struct *p, int prio,
2617 {                                             !! 2081                                 unsigned char *swap_map,
2618         struct block_device *bdev;            !! 2082                                 struct swap_cluster_info *cluster_info)
2619                                               << 
2620         if (si->bdev)                         << 
2621                 bdev = si->bdev;              << 
2622         else                                  << 
2623                 bdev = si->swap_file->f_inode << 
2624                                               << 
2625         return bdev ? bdev->bd_disk->node_id  << 
2626 }                                             << 
2627                                               << 
2628 static void setup_swap_info(struct swap_info_ << 
2629                             unsigned char *sw << 
2630                             struct swap_clust << 
2631                             unsigned long *ze << 
2632 {                                                2083 {
2633         int i;                                << 
2634                                               << 
2635         if (prio >= 0)                           2084         if (prio >= 0)
2636                 si->prio = prio;              !! 2085                 p->prio = prio;
2637         else                                     2086         else
2638                 si->prio = --least_priority;  !! 2087                 p->prio = --least_priority;
2639         /*                                       2088         /*
2640          * the plist prio is negated because     2089          * the plist prio is negated because plist ordering is
2641          * low-to-high, while swap ordering i    2090          * low-to-high, while swap ordering is high-to-low
2642          */                                      2091          */
2643         si->list.prio = -si->prio;            !! 2092         p->list.prio = -p->prio;
2644         for_each_node(i) {                    !! 2093         p->avail_list.prio = -p->prio;
2645                 if (si->prio >= 0)            !! 2094         p->swap_map = swap_map;
2646                         si->avail_lists[i].pr !! 2095         p->cluster_info = cluster_info;
2647                 else {                        !! 2096         p->flags |= SWP_WRITEOK;
2648                         if (swap_node(si) ==  !! 2097         atomic_long_add(p->pages, &nr_swap_pages);
2649                                 si->avail_lis !! 2098         total_swap_pages += p->pages;
2650                         else                  << 
2651                                 si->avail_lis << 
2652                 }                             << 
2653         }                                     << 
2654         si->swap_map = swap_map;              << 
2655         si->cluster_info = cluster_info;      << 
2656         si->zeromap = zeromap;                << 
2657 }                                             << 
2658                                               << 
2659 static void _enable_swap_info(struct swap_inf << 
2660 {                                             << 
2661         si->flags |= SWP_WRITEOK;             << 
2662         atomic_long_add(si->pages, &nr_swap_p << 
2663         total_swap_pages += si->pages;        << 
2664                                                  2099 
2665         assert_spin_locked(&swap_lock);          2100         assert_spin_locked(&swap_lock);
2666         /*                                       2101         /*
2667          * both lists are plists, and thus pr    2102          * both lists are plists, and thus priority ordered.
2668          * swap_active_head needs to be prior    2103          * swap_active_head needs to be priority ordered for swapoff(),
2669          * which on removal of any swap_info_    2104          * which on removal of any swap_info_struct with an auto-assigned
2670          * (i.e. negative) priority increment    2105          * (i.e. negative) priority increments the auto-assigned priority
2671          * of any lower-priority swap_info_st    2106          * of any lower-priority swap_info_structs.
2672          * swap_avail_head needs to be priori !! 2107          * swap_avail_head needs to be priority ordered for get_swap_page(),
2673          * which allocates swap pages from th    2108          * which allocates swap pages from the highest available priority
2674          * swap_info_struct.                     2109          * swap_info_struct.
2675          */                                      2110          */
2676         plist_add(&si->list, &swap_active_hea !! 2111         plist_add(&p->list, &swap_active_head);
2677                                               !! 2112         spin_lock(&swap_avail_lock);
2678         /* add to available list iff swap dev !! 2113         plist_add(&p->avail_list, &swap_avail_head);
2679         if (si->highest_bit)                  !! 2114         spin_unlock(&swap_avail_lock);
2680                 add_to_avail_list(si);        << 
2681 }                                                2115 }
2682                                                  2116 
2683 static void enable_swap_info(struct swap_info !! 2117 static void enable_swap_info(struct swap_info_struct *p, int prio,
2684                                 unsigned char    2118                                 unsigned char *swap_map,
2685                                 struct swap_c    2119                                 struct swap_cluster_info *cluster_info,
2686                                 unsigned long !! 2120                                 unsigned long *frontswap_map)
2687 {                                                2121 {
                                                   >> 2122         frontswap_init(p->type, frontswap_map);
2688         spin_lock(&swap_lock);                   2123         spin_lock(&swap_lock);
2689         spin_lock(&si->lock);                 !! 2124         spin_lock(&p->lock);
2690         setup_swap_info(si, prio, swap_map, c !! 2125          _enable_swap_info(p, prio, swap_map, cluster_info);
2691         spin_unlock(&si->lock);               !! 2126         spin_unlock(&p->lock);
2692         spin_unlock(&swap_lock);              << 
2693         /*                                    << 
2694          * Finished initializing swap device, << 
2695          */                                   << 
2696         percpu_ref_resurrect(&si->users);     << 
2697         spin_lock(&swap_lock);                << 
2698         spin_lock(&si->lock);                 << 
2699         _enable_swap_info(si);                << 
2700         spin_unlock(&si->lock);               << 
2701         spin_unlock(&swap_lock);                 2127         spin_unlock(&swap_lock);
2702 }                                                2128 }
2703                                                  2129 
2704 static void reinsert_swap_info(struct swap_in !! 2130 static void reinsert_swap_info(struct swap_info_struct *p)
2705 {                                                2131 {
2706         spin_lock(&swap_lock);                   2132         spin_lock(&swap_lock);
2707         spin_lock(&si->lock);                 !! 2133         spin_lock(&p->lock);
2708         setup_swap_info(si, si->prio, si->swa !! 2134         _enable_swap_info(p, p->prio, p->swap_map, p->cluster_info);
2709         _enable_swap_info(si);                !! 2135         spin_unlock(&p->lock);
2710         spin_unlock(&si->lock);               << 
2711         spin_unlock(&swap_lock);                 2136         spin_unlock(&swap_lock);
2712 }                                                2137 }
2713                                                  2138 
2714 static bool __has_usable_swap(void)           << 
2715 {                                             << 
2716         return !plist_head_empty(&swap_active << 
2717 }                                             << 
2718                                               << 
2719 bool has_usable_swap(void)                       2139 bool has_usable_swap(void)
2720 {                                                2140 {
2721         bool ret;                             !! 2141         bool ret = true;
2722                                                  2142 
2723         spin_lock(&swap_lock);                   2143         spin_lock(&swap_lock);
2724         ret = __has_usable_swap();            !! 2144         if (plist_head_empty(&swap_active_head))
                                                   >> 2145                 ret = false;
2725         spin_unlock(&swap_lock);                 2146         spin_unlock(&swap_lock);
2726         return ret;                              2147         return ret;
2727 }                                                2148 }
2728                                                  2149 
2729 SYSCALL_DEFINE1(swapoff, const char __user *,    2150 SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
2730 {                                                2151 {
2731         struct swap_info_struct *p = NULL;       2152         struct swap_info_struct *p = NULL;
2732         unsigned char *swap_map;                 2153         unsigned char *swap_map;
2733         unsigned long *zeromap;               << 
2734         struct swap_cluster_info *cluster_inf    2154         struct swap_cluster_info *cluster_info;
                                                   >> 2155         unsigned long *frontswap_map;
2735         struct file *swap_file, *victim;         2156         struct file *swap_file, *victim;
2736         struct address_space *mapping;           2157         struct address_space *mapping;
2737         struct inode *inode;                     2158         struct inode *inode;
2738         struct filename *pathname;               2159         struct filename *pathname;
2739         int err, found = 0;                      2160         int err, found = 0;
                                                   >> 2161         unsigned int old_block_size;
2740                                                  2162 
2741         if (!capable(CAP_SYS_ADMIN))             2163         if (!capable(CAP_SYS_ADMIN))
2742                 return -EPERM;                   2164                 return -EPERM;
2743                                                  2165 
2744         BUG_ON(!current->mm);                    2166         BUG_ON(!current->mm);
2745                                                  2167 
2746         pathname = getname(specialfile);         2168         pathname = getname(specialfile);
2747         if (IS_ERR(pathname))                    2169         if (IS_ERR(pathname))
2748                 return PTR_ERR(pathname);        2170                 return PTR_ERR(pathname);
2749                                                  2171 
2750         victim = file_open_name(pathname, O_R    2172         victim = file_open_name(pathname, O_RDWR|O_LARGEFILE, 0);
2751         err = PTR_ERR(victim);                   2173         err = PTR_ERR(victim);
2752         if (IS_ERR(victim))                      2174         if (IS_ERR(victim))
2753                 goto out;                        2175                 goto out;
2754                                                  2176 
2755         mapping = victim->f_mapping;             2177         mapping = victim->f_mapping;
2756         spin_lock(&swap_lock);                   2178         spin_lock(&swap_lock);
2757         plist_for_each_entry(p, &swap_active_    2179         plist_for_each_entry(p, &swap_active_head, list) {
2758                 if (p->flags & SWP_WRITEOK) {    2180                 if (p->flags & SWP_WRITEOK) {
2759                         if (p->swap_file->f_m    2181                         if (p->swap_file->f_mapping == mapping) {
2760                                 found = 1;       2182                                 found = 1;
2761                                 break;           2183                                 break;
2762                         }                        2184                         }
2763                 }                                2185                 }
2764         }                                        2186         }
2765         if (!found) {                            2187         if (!found) {
2766                 err = -EINVAL;                   2188                 err = -EINVAL;
2767                 spin_unlock(&swap_lock);         2189                 spin_unlock(&swap_lock);
2768                 goto out_dput;                   2190                 goto out_dput;
2769         }                                        2191         }
2770         if (!security_vm_enough_memory_mm(cur    2192         if (!security_vm_enough_memory_mm(current->mm, p->pages))
2771                 vm_unacct_memory(p->pages);      2193                 vm_unacct_memory(p->pages);
2772         else {                                   2194         else {
2773                 err = -ENOMEM;                   2195                 err = -ENOMEM;
2774                 spin_unlock(&swap_lock);         2196                 spin_unlock(&swap_lock);
2775                 goto out_dput;                   2197                 goto out_dput;
2776         }                                        2198         }
                                                   >> 2199         spin_lock(&swap_avail_lock);
                                                   >> 2200         plist_del(&p->avail_list, &swap_avail_head);
                                                   >> 2201         spin_unlock(&swap_avail_lock);
2777         spin_lock(&p->lock);                     2202         spin_lock(&p->lock);
2778         del_from_avail_list(p);               << 
2779         if (p->prio < 0) {                       2203         if (p->prio < 0) {
2780                 struct swap_info_struct *si =    2204                 struct swap_info_struct *si = p;
2781                 int nid;                      << 
2782                                                  2205 
2783                 plist_for_each_entry_continue    2206                 plist_for_each_entry_continue(si, &swap_active_head, list) {
2784                         si->prio++;              2207                         si->prio++;
2785                         si->list.prio--;         2208                         si->list.prio--;
2786                         for_each_node(nid) {  !! 2209                         si->avail_list.prio--;
2787                                 if (si->avail << 
2788                                         si->a << 
2789                         }                     << 
2790                 }                                2210                 }
2791                 least_priority++;                2211                 least_priority++;
2792         }                                        2212         }
2793         plist_del(&p->list, &swap_active_head    2213         plist_del(&p->list, &swap_active_head);
2794         atomic_long_sub(p->pages, &nr_swap_pa    2214         atomic_long_sub(p->pages, &nr_swap_pages);
2795         total_swap_pages -= p->pages;            2215         total_swap_pages -= p->pages;
2796         p->flags &= ~SWP_WRITEOK;                2216         p->flags &= ~SWP_WRITEOK;
2797         spin_unlock(&p->lock);                   2217         spin_unlock(&p->lock);
2798         spin_unlock(&swap_lock);                 2218         spin_unlock(&swap_lock);
2799                                                  2219 
2800         disable_swap_slots_cache_lock();         2220         disable_swap_slots_cache_lock();
2801                                                  2221 
2802         set_current_oom_origin();                2222         set_current_oom_origin();
2803         err = try_to_unuse(p->type);          !! 2223         err = try_to_unuse(p->type, false, 0); /* force unuse all pages */
2804         clear_current_oom_origin();              2224         clear_current_oom_origin();
2805                                                  2225 
2806         if (err) {                               2226         if (err) {
2807                 /* re-insert swap space back     2227                 /* re-insert swap space back into swap_list */
2808                 reinsert_swap_info(p);           2228                 reinsert_swap_info(p);
2809                 reenable_swap_slots_cache_unl    2229                 reenable_swap_slots_cache_unlock();
2810                 goto out_dput;                   2230                 goto out_dput;
2811         }                                        2231         }
2812                                                  2232 
2813         reenable_swap_slots_cache_unlock();      2233         reenable_swap_slots_cache_unlock();
2814                                                  2234 
2815         /*                                    << 
2816          * Wait for swap operations protected << 
2817          * to complete.  Because of synchroni << 
2818          * operations protected by RCU reader << 
2819          * spinlock) will be waited too.  Thi << 
2820          * prevent folio_test_swapcache() and << 
2821          * operations from racing with swapof << 
2822          */                                   << 
2823         percpu_ref_kill(&p->users);           << 
2824         synchronize_rcu();                    << 
2825         wait_for_completion(&p->comp);        << 
2826                                               << 
2827         flush_work(&p->discard_work);            2235         flush_work(&p->discard_work);
2828         flush_work(&p->reclaim_work);         << 
2829                                                  2236 
2830         destroy_swap_extents(p);                 2237         destroy_swap_extents(p);
2831         if (p->flags & SWP_CONTINUED)            2238         if (p->flags & SWP_CONTINUED)
2832                 free_swap_count_continuations    2239                 free_swap_count_continuations(p);
2833                                                  2240 
2834         if (!p->bdev || !bdev_nonrot(p->bdev) << 
2835                 atomic_dec(&nr_rotate_swap);  << 
2836                                               << 
2837         mutex_lock(&swapon_mutex);               2241         mutex_lock(&swapon_mutex);
2838         spin_lock(&swap_lock);                   2242         spin_lock(&swap_lock);
2839         spin_lock(&p->lock);                     2243         spin_lock(&p->lock);
2840         drain_mmlist();                          2244         drain_mmlist();
2841                                                  2245 
2842         /* wait for anyone still in scan_swap !! 2246         /* wait for anyone still in scan_swap_map */
2843         p->highest_bit = 0;             /* cu    2247         p->highest_bit = 0;             /* cuts scans short */
2844         while (p->flags >= SWP_SCANNING) {       2248         while (p->flags >= SWP_SCANNING) {
2845                 spin_unlock(&p->lock);           2249                 spin_unlock(&p->lock);
2846                 spin_unlock(&swap_lock);         2250                 spin_unlock(&swap_lock);
2847                 schedule_timeout_uninterrupti    2251                 schedule_timeout_uninterruptible(1);
2848                 spin_lock(&swap_lock);           2252                 spin_lock(&swap_lock);
2849                 spin_lock(&p->lock);             2253                 spin_lock(&p->lock);
2850         }                                        2254         }
2851                                                  2255 
2852         swap_file = p->swap_file;                2256         swap_file = p->swap_file;
                                                   >> 2257         old_block_size = p->old_block_size;
2853         p->swap_file = NULL;                     2258         p->swap_file = NULL;
2854         p->max = 0;                              2259         p->max = 0;
2855         swap_map = p->swap_map;                  2260         swap_map = p->swap_map;
2856         p->swap_map = NULL;                      2261         p->swap_map = NULL;
2857         zeromap = p->zeromap;                 << 
2858         p->zeromap = NULL;                    << 
2859         cluster_info = p->cluster_info;          2262         cluster_info = p->cluster_info;
2860         p->cluster_info = NULL;                  2263         p->cluster_info = NULL;
                                                   >> 2264         frontswap_map = frontswap_map_get(p);
2861         spin_unlock(&p->lock);                   2265         spin_unlock(&p->lock);
2862         spin_unlock(&swap_lock);                 2266         spin_unlock(&swap_lock);
2863         arch_swap_invalidate_area(p->type);   !! 2267         frontswap_invalidate_area(p->type);
2864         zswap_swapoff(p->type);               !! 2268         frontswap_map_set(p, NULL);
2865         mutex_unlock(&swapon_mutex);             2269         mutex_unlock(&swapon_mutex);
2866         free_percpu(p->percpu_cluster);          2270         free_percpu(p->percpu_cluster);
2867         p->percpu_cluster = NULL;                2271         p->percpu_cluster = NULL;
2868         free_percpu(p->cluster_next_cpu);     << 
2869         p->cluster_next_cpu = NULL;           << 
2870         vfree(swap_map);                         2272         vfree(swap_map);
2871         kvfree(zeromap);                      << 
2872         kvfree(cluster_info);                    2273         kvfree(cluster_info);
                                                   >> 2274         kvfree(frontswap_map);
2873         /* Destroy swap account information *    2275         /* Destroy swap account information */
2874         swap_cgroup_swapoff(p->type);            2276         swap_cgroup_swapoff(p->type);
2875         exit_swap_address_space(p->type);        2277         exit_swap_address_space(p->type);
2876                                                  2278 
2877         inode = mapping->host;                   2279         inode = mapping->host;
2878                                               !! 2280         if (S_ISBLK(inode->i_mode)) {
2879         inode_lock(inode);                    !! 2281                 struct block_device *bdev = I_BDEV(inode);
2880         inode->i_flags &= ~S_SWAPFILE;        !! 2282                 set_blocksize(bdev, old_block_size);
2881         inode_unlock(inode);                  !! 2283                 blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
                                                   >> 2284         } else {
                                                   >> 2285                 inode_lock(inode);
                                                   >> 2286                 inode->i_flags &= ~S_SWAPFILE;
                                                   >> 2287                 inode_unlock(inode);
                                                   >> 2288         }
2882         filp_close(swap_file, NULL);             2289         filp_close(swap_file, NULL);
2883                                                  2290 
2884         /*                                       2291         /*
2885          * Clear the SWP_USED flag after all     2292          * Clear the SWP_USED flag after all resources are freed so that swapon
2886          * can reuse this swap_info in alloc_    2293          * can reuse this swap_info in alloc_swap_info() safely.  It is ok to
2887          * not hold p->lock after we cleared     2294          * not hold p->lock after we cleared its SWP_WRITEOK.
2888          */                                      2295          */
2889         spin_lock(&swap_lock);                   2296         spin_lock(&swap_lock);
2890         p->flags = 0;                            2297         p->flags = 0;
2891         spin_unlock(&swap_lock);                 2298         spin_unlock(&swap_lock);
2892                                                  2299 
2893         err = 0;                                 2300         err = 0;
2894         atomic_inc(&proc_poll_event);            2301         atomic_inc(&proc_poll_event);
2895         wake_up_interruptible(&proc_poll_wait    2302         wake_up_interruptible(&proc_poll_wait);
2896                                                  2303 
2897 out_dput:                                        2304 out_dput:
2898         filp_close(victim, NULL);                2305         filp_close(victim, NULL);
2899 out:                                             2306 out:
2900         putname(pathname);                       2307         putname(pathname);
2901         return err;                              2308         return err;
2902 }                                                2309 }
2903                                                  2310 
2904 #ifdef CONFIG_PROC_FS                            2311 #ifdef CONFIG_PROC_FS
2905 static __poll_t swaps_poll(struct file *file, !! 2312 static unsigned swaps_poll(struct file *file, poll_table *wait)
2906 {                                                2313 {
2907         struct seq_file *seq = file->private_    2314         struct seq_file *seq = file->private_data;
2908                                                  2315 
2909         poll_wait(file, &proc_poll_wait, wait    2316         poll_wait(file, &proc_poll_wait, wait);
2910                                                  2317 
2911         if (seq->poll_event != atomic_read(&p    2318         if (seq->poll_event != atomic_read(&proc_poll_event)) {
2912                 seq->poll_event = atomic_read    2319                 seq->poll_event = atomic_read(&proc_poll_event);
2913                 return EPOLLIN | EPOLLRDNORM  !! 2320                 return POLLIN | POLLRDNORM | POLLERR | POLLPRI;
2914         }                                        2321         }
2915                                                  2322 
2916         return EPOLLIN | EPOLLRDNORM;         !! 2323         return POLLIN | POLLRDNORM;
2917 }                                                2324 }
2918                                                  2325 
2919 /* iterator */                                   2326 /* iterator */
2920 static void *swap_start(struct seq_file *swap    2327 static void *swap_start(struct seq_file *swap, loff_t *pos)
2921 {                                                2328 {
2922         struct swap_info_struct *si;             2329         struct swap_info_struct *si;
2923         int type;                                2330         int type;
2924         loff_t l = *pos;                         2331         loff_t l = *pos;
2925                                                  2332 
2926         mutex_lock(&swapon_mutex);               2333         mutex_lock(&swapon_mutex);
2927                                                  2334 
2928         if (!l)                                  2335         if (!l)
2929                 return SEQ_START_TOKEN;          2336                 return SEQ_START_TOKEN;
2930                                                  2337 
2931         for (type = 0; (si = swap_type_to_swa !! 2338         for (type = 0; type < nr_swapfiles; type++) {
                                                   >> 2339                 smp_rmb();      /* read nr_swapfiles before swap_info[type] */
                                                   >> 2340                 si = swap_info[type];
2932                 if (!(si->flags & SWP_USED) |    2341                 if (!(si->flags & SWP_USED) || !si->swap_map)
2933                         continue;                2342                         continue;
2934                 if (!--l)                        2343                 if (!--l)
2935                         return si;               2344                         return si;
2936         }                                        2345         }
2937                                                  2346 
2938         return NULL;                             2347         return NULL;
2939 }                                                2348 }
2940                                                  2349 
2941 static void *swap_next(struct seq_file *swap,    2350 static void *swap_next(struct seq_file *swap, void *v, loff_t *pos)
2942 {                                                2351 {
2943         struct swap_info_struct *si = v;         2352         struct swap_info_struct *si = v;
2944         int type;                                2353         int type;
2945                                                  2354 
2946         if (v == SEQ_START_TOKEN)                2355         if (v == SEQ_START_TOKEN)
2947                 type = 0;                        2356                 type = 0;
2948         else                                     2357         else
2949                 type = si->type + 1;             2358                 type = si->type + 1;
2950                                                  2359 
2951         ++(*pos);                             !! 2360         for (; type < nr_swapfiles; type++) {
2952         for (; (si = swap_type_to_swap_info(t !! 2361                 smp_rmb();      /* read nr_swapfiles before swap_info[type] */
                                                   >> 2362                 si = swap_info[type];
2953                 if (!(si->flags & SWP_USED) |    2363                 if (!(si->flags & SWP_USED) || !si->swap_map)
2954                         continue;                2364                         continue;
                                                   >> 2365                 ++*pos;
2955                 return si;                       2366                 return si;
2956         }                                        2367         }
2957                                                  2368 
2958         return NULL;                             2369         return NULL;
2959 }                                                2370 }
2960                                                  2371 
2961 static void swap_stop(struct seq_file *swap,     2372 static void swap_stop(struct seq_file *swap, void *v)
2962 {                                                2373 {
2963         mutex_unlock(&swapon_mutex);             2374         mutex_unlock(&swapon_mutex);
2964 }                                                2375 }
2965                                                  2376 
2966 static int swap_show(struct seq_file *swap, v    2377 static int swap_show(struct seq_file *swap, void *v)
2967 {                                                2378 {
2968         struct swap_info_struct *si = v;         2379         struct swap_info_struct *si = v;
2969         struct file *file;                       2380         struct file *file;
2970         int len;                                 2381         int len;
2971         unsigned long bytes, inuse;           << 
2972                                                  2382 
2973         if (si == SEQ_START_TOKEN) {             2383         if (si == SEQ_START_TOKEN) {
2974                 seq_puts(swap, "Filename\t\t\ !! 2384                 seq_puts(swap,"Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n");
2975                 return 0;                        2385                 return 0;
2976         }                                        2386         }
2977                                                  2387 
2978         bytes = K(si->pages);                 << 
2979         inuse = K(READ_ONCE(si->inuse_pages)) << 
2980                                               << 
2981         file = si->swap_file;                    2388         file = si->swap_file;
2982         len = seq_file_path(swap, file, " \t\    2389         len = seq_file_path(swap, file, " \t\n\\");
2983         seq_printf(swap, "%*s%s\t%lu\t%s%lu\t !! 2390         seq_printf(swap, "%*s%s\t%u\t%u\t%d\n",
2984                         len < 40 ? 40 - len :    2391                         len < 40 ? 40 - len : 1, " ",
2985                         S_ISBLK(file_inode(fi    2392                         S_ISBLK(file_inode(file)->i_mode) ?
2986                                 "partition" :    2393                                 "partition" : "file\t",
2987                         bytes, bytes < 100000 !! 2394                         si->pages << (PAGE_SHIFT - 10),
2988                         inuse, inuse < 100000 !! 2395                         si->inuse_pages << (PAGE_SHIFT - 10),
2989                         si->prio);               2396                         si->prio);
2990         return 0;                                2397         return 0;
2991 }                                                2398 }
2992                                                  2399 
2993 static const struct seq_operations swaps_op =    2400 static const struct seq_operations swaps_op = {
2994         .start =        swap_start,              2401         .start =        swap_start,
2995         .next =         swap_next,               2402         .next =         swap_next,
2996         .stop =         swap_stop,               2403         .stop =         swap_stop,
2997         .show =         swap_show                2404         .show =         swap_show
2998 };                                               2405 };
2999                                                  2406 
3000 static int swaps_open(struct inode *inode, st    2407 static int swaps_open(struct inode *inode, struct file *file)
3001 {                                                2408 {
3002         struct seq_file *seq;                    2409         struct seq_file *seq;
3003         int ret;                                 2410         int ret;
3004                                                  2411 
3005         ret = seq_open(file, &swaps_op);         2412         ret = seq_open(file, &swaps_op);
3006         if (ret)                                 2413         if (ret)
3007                 return ret;                      2414                 return ret;
3008                                                  2415 
3009         seq = file->private_data;                2416         seq = file->private_data;
3010         seq->poll_event = atomic_read(&proc_p    2417         seq->poll_event = atomic_read(&proc_poll_event);
3011         return 0;                                2418         return 0;
3012 }                                                2419 }
3013                                                  2420 
3014 static const struct proc_ops swaps_proc_ops = !! 2421 static const struct file_operations proc_swaps_operations = {
3015         .proc_flags     = PROC_ENTRY_PERMANEN !! 2422         .open           = swaps_open,
3016         .proc_open      = swaps_open,         !! 2423         .read           = seq_read,
3017         .proc_read      = seq_read,           !! 2424         .llseek         = seq_lseek,
3018         .proc_lseek     = seq_lseek,          !! 2425         .release        = seq_release,
3019         .proc_release   = seq_release,        !! 2426         .poll           = swaps_poll,
3020         .proc_poll      = swaps_poll,         << 
3021 };                                               2427 };
3022                                                  2428 
3023 static int __init procswaps_init(void)           2429 static int __init procswaps_init(void)
3024 {                                                2430 {
3025         proc_create("swaps", 0, NULL, &swaps_ !! 2431         proc_create("swaps", 0, NULL, &proc_swaps_operations);
3026         return 0;                                2432         return 0;
3027 }                                                2433 }
3028 __initcall(procswaps_init);                      2434 __initcall(procswaps_init);
3029 #endif /* CONFIG_PROC_FS */                      2435 #endif /* CONFIG_PROC_FS */
3030                                                  2436 
3031 #ifdef MAX_SWAPFILES_CHECK                       2437 #ifdef MAX_SWAPFILES_CHECK
3032 static int __init max_swapfiles_check(void)      2438 static int __init max_swapfiles_check(void)
3033 {                                                2439 {
3034         MAX_SWAPFILES_CHECK();                   2440         MAX_SWAPFILES_CHECK();
3035         return 0;                                2441         return 0;
3036 }                                                2442 }
3037 late_initcall(max_swapfiles_check);              2443 late_initcall(max_swapfiles_check);
3038 #endif                                           2444 #endif
3039                                                  2445 
3040 static struct swap_info_struct *alloc_swap_in    2446 static struct swap_info_struct *alloc_swap_info(void)
3041 {                                                2447 {
3042         struct swap_info_struct *p;              2448         struct swap_info_struct *p;
3043         struct swap_info_struct *defer = NULL << 
3044         unsigned int type;                       2449         unsigned int type;
3045         int i;                                << 
3046                                                  2450 
3047         p = kvzalloc(struct_size(p, avail_lis !! 2451         p = kzalloc(sizeof(*p), GFP_KERNEL);
3048         if (!p)                                  2452         if (!p)
3049                 return ERR_PTR(-ENOMEM);         2453                 return ERR_PTR(-ENOMEM);
3050                                                  2454 
3051         if (percpu_ref_init(&p->users, swap_u << 
3052                             PERCPU_REF_INIT_D << 
3053                 kvfree(p);                    << 
3054                 return ERR_PTR(-ENOMEM);      << 
3055         }                                     << 
3056                                               << 
3057         spin_lock(&swap_lock);                   2455         spin_lock(&swap_lock);
3058         for (type = 0; type < nr_swapfiles; t    2456         for (type = 0; type < nr_swapfiles; type++) {
3059                 if (!(swap_info[type]->flags     2457                 if (!(swap_info[type]->flags & SWP_USED))
3060                         break;                   2458                         break;
3061         }                                        2459         }
3062         if (type >= MAX_SWAPFILES) {             2460         if (type >= MAX_SWAPFILES) {
3063                 spin_unlock(&swap_lock);         2461                 spin_unlock(&swap_lock);
3064                 percpu_ref_exit(&p->users);   !! 2462                 kfree(p);
3065                 kvfree(p);                    << 
3066                 return ERR_PTR(-EPERM);          2463                 return ERR_PTR(-EPERM);
3067         }                                        2464         }
3068         if (type >= nr_swapfiles) {              2465         if (type >= nr_swapfiles) {
3069                 p->type = type;                  2466                 p->type = type;
                                                   >> 2467                 swap_info[type] = p;
3070                 /*                               2468                 /*
3071                  * Publish the swap_info_stru !! 2469                  * Write swap_info[type] before nr_swapfiles, in case a
3072                  * Note that kvzalloc() above !! 2470                  * racing procfs swap_start() or swap_next() is reading them.
                                                   >> 2471                  * (We never shrink nr_swapfiles, we never free this entry.)
3073                  */                              2472                  */
3074                 smp_store_release(&swap_info[ !! 2473                 smp_wmb();
3075                 nr_swapfiles++;                  2474                 nr_swapfiles++;
3076         } else {                                 2475         } else {
3077                 defer = p;                    !! 2476                 kfree(p);
3078                 p = swap_info[type];             2477                 p = swap_info[type];
3079                 /*                               2478                 /*
3080                  * Do not memset this entry:     2479                  * Do not memset this entry: a racing procfs swap_next()
3081                  * would be relying on p->typ    2480                  * would be relying on p->type to remain valid.
3082                  */                              2481                  */
3083         }                                        2482         }
3084         p->swap_extent_root = RB_ROOT;        !! 2483         INIT_LIST_HEAD(&p->first_swap_extent.list);
3085         plist_node_init(&p->list, 0);            2484         plist_node_init(&p->list, 0);
3086         for_each_node(i)                      !! 2485         plist_node_init(&p->avail_list, 0);
3087                 plist_node_init(&p->avail_lis << 
3088         p->flags = SWP_USED;                     2486         p->flags = SWP_USED;
3089         spin_unlock(&swap_lock);                 2487         spin_unlock(&swap_lock);
3090         if (defer) {                          << 
3091                 percpu_ref_exit(&defer->users << 
3092                 kvfree(defer);                << 
3093         }                                     << 
3094         spin_lock_init(&p->lock);                2488         spin_lock_init(&p->lock);
3095         spin_lock_init(&p->cont_lock);        << 
3096         init_completion(&p->comp);            << 
3097                                                  2489 
3098         return p;                                2490         return p;
3099 }                                                2491 }
3100                                                  2492 
3101 static int claim_swapfile(struct swap_info_st !! 2493 static int claim_swapfile(struct swap_info_struct *p, struct inode *inode)
3102 {                                                2494 {
                                                   >> 2495         int error;
                                                   >> 2496 
3103         if (S_ISBLK(inode->i_mode)) {            2497         if (S_ISBLK(inode->i_mode)) {
3104                 si->bdev = I_BDEV(inode);     !! 2498                 p->bdev = bdgrab(I_BDEV(inode));
3105                 /*                            !! 2499                 error = blkdev_get(p->bdev,
3106                  * Zoned block devices contai !! 2500                                    FMODE_READ | FMODE_WRITE | FMODE_EXCL, p);
3107                  * write only restriction.  H !! 2501                 if (error < 0) {
3108                  * suitable for swapping.  Di !! 2502                         p->bdev = NULL;
3109                  */                           !! 2503                         return error;
3110                 if (bdev_is_zoned(si->bdev))  !! 2504                 }
3111                         return -EINVAL;       !! 2505                 p->old_block_size = block_size(p->bdev);
3112                 si->flags |= SWP_BLKDEV;      !! 2506                 error = set_blocksize(p->bdev, PAGE_SIZE);
                                                   >> 2507                 if (error < 0)
                                                   >> 2508                         return error;
                                                   >> 2509                 p->flags |= SWP_BLKDEV;
3113         } else if (S_ISREG(inode->i_mode)) {     2510         } else if (S_ISREG(inode->i_mode)) {
3114                 si->bdev = inode->i_sb->s_bde !! 2511                 p->bdev = inode->i_sb->s_bdev;
3115         }                                     !! 2512                 inode_lock(inode);
                                                   >> 2513                 if (IS_SWAPFILE(inode))
                                                   >> 2514                         return -EBUSY;
                                                   >> 2515         } else
                                                   >> 2516                 return -EINVAL;
3116                                                  2517 
3117         return 0;                                2518         return 0;
3118 }                                                2519 }
3119                                                  2520 
3120                                               !! 2521 static unsigned long read_swap_header(struct swap_info_struct *p,
3121 /*                                            << 
3122  * Find out how many pages are allowed for a  << 
3123  * are two limiting factors:                  << 
3124  * 1) the number of bits for the swap offset  << 
3125  * 2) the number of bits in the swap pte, as  << 
3126  * architectures.                             << 
3127  *                                            << 
3128  * In order to find the largest possible bit  << 
3129  * swap type 0 and swap offset ~0UL is create << 
3130  * decoded to a swp_entry_t again, and finall << 
3131  * extracted.                                 << 
3132  *                                            << 
3133  * This will mask all the bits from the initi << 
3134  * be encoded in either the swp_entry_t or th << 
3135  * of a swap pte.                             << 
3136  */                                           << 
3137 unsigned long generic_max_swapfile_size(void) << 
3138 {                                             << 
3139         return swp_offset(pte_to_swp_entry(   << 
3140                         swp_entry_to_pte(swp_ << 
3141 }                                             << 
3142                                               << 
3143 /* Can be overridden by an architecture for a << 
3144 __weak unsigned long arch_max_swapfile_size(v << 
3145 {                                             << 
3146         return generic_max_swapfile_size();   << 
3147 }                                             << 
3148                                               << 
3149 static unsigned long read_swap_header(struct  << 
3150                                         union    2522                                         union swap_header *swap_header,
3151                                         struc    2523                                         struct inode *inode)
3152 {                                                2524 {
3153         int i;                                   2525         int i;
3154         unsigned long maxpages;                  2526         unsigned long maxpages;
3155         unsigned long swapfilepages;             2527         unsigned long swapfilepages;
3156         unsigned long last_page;                 2528         unsigned long last_page;
3157                                                  2529 
3158         if (memcmp("SWAPSPACE2", swap_header-    2530         if (memcmp("SWAPSPACE2", swap_header->magic.magic, 10)) {
3159                 pr_err("Unable to find swap-s    2531                 pr_err("Unable to find swap-space signature\n");
3160                 return 0;                        2532                 return 0;
3161         }                                        2533         }
3162                                                  2534 
3163         /* swap partition endianness hack...  !! 2535         /* swap partition endianess hack... */
3164         if (swab32(swap_header->info.version)    2536         if (swab32(swap_header->info.version) == 1) {
3165                 swab32s(&swap_header->info.ve    2537                 swab32s(&swap_header->info.version);
3166                 swab32s(&swap_header->info.la    2538                 swab32s(&swap_header->info.last_page);
3167                 swab32s(&swap_header->info.nr    2539                 swab32s(&swap_header->info.nr_badpages);
3168                 if (swap_header->info.nr_badp    2540                 if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES)
3169                         return 0;                2541                         return 0;
3170                 for (i = 0; i < swap_header->    2542                 for (i = 0; i < swap_header->info.nr_badpages; i++)
3171                         swab32s(&swap_header-    2543                         swab32s(&swap_header->info.badpages[i]);
3172         }                                        2544         }
3173         /* Check the swap header's sub-versio    2545         /* Check the swap header's sub-version */
3174         if (swap_header->info.version != 1) {    2546         if (swap_header->info.version != 1) {
3175                 pr_warn("Unable to handle swa    2547                 pr_warn("Unable to handle swap header version %d\n",
3176                         swap_header->info.ver    2548                         swap_header->info.version);
3177                 return 0;                        2549                 return 0;
3178         }                                        2550         }
3179                                                  2551 
3180         si->lowest_bit  = 1;                  !! 2552         p->lowest_bit  = 1;
3181         si->cluster_next = 1;                 !! 2553         p->cluster_next = 1;
3182         si->cluster_nr = 0;                   !! 2554         p->cluster_nr = 0;
3183                                               !! 2555 
3184         maxpages = swapfile_maximum_size;     !! 2556         /*
                                                   >> 2557          * Find out how many pages are allowed for a single swap
                                                   >> 2558          * device. There are two limiting factors: 1) the number
                                                   >> 2559          * of bits for the swap offset in the swp_entry_t type, and
                                                   >> 2560          * 2) the number of bits in the swap pte as defined by the
                                                   >> 2561          * different architectures. In order to find the
                                                   >> 2562          * largest possible bit mask, a swap entry with swap type 0
                                                   >> 2563          * and swap offset ~0UL is created, encoded to a swap pte,
                                                   >> 2564          * decoded to a swp_entry_t again, and finally the swap
                                                   >> 2565          * offset is extracted. This will mask all the bits from
                                                   >> 2566          * the initial ~0UL mask that can't be encoded in either
                                                   >> 2567          * the swp_entry_t or the architecture definition of a
                                                   >> 2568          * swap pte.
                                                   >> 2569          */
                                                   >> 2570         maxpages = swp_offset(pte_to_swp_entry(
                                                   >> 2571                         swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1;
3185         last_page = swap_header->info.last_pa    2572         last_page = swap_header->info.last_page;
3186         if (!last_page) {                     << 
3187                 pr_warn("Empty swap-file\n"); << 
3188                 return 0;                     << 
3189         }                                     << 
3190         if (last_page > maxpages) {              2573         if (last_page > maxpages) {
3191                 pr_warn("Truncating oversized    2574                 pr_warn("Truncating oversized swap area, only using %luk out of %luk\n",
3192                         K(maxpages), K(last_p !! 2575                         maxpages << (PAGE_SHIFT - 10),
                                                   >> 2576                         last_page << (PAGE_SHIFT - 10));
3193         }                                        2577         }
3194         if (maxpages > last_page) {              2578         if (maxpages > last_page) {
3195                 maxpages = last_page + 1;        2579                 maxpages = last_page + 1;
3196                 /* p->max is an unsigned int:    2580                 /* p->max is an unsigned int: don't overflow it */
3197                 if ((unsigned int)maxpages ==    2581                 if ((unsigned int)maxpages == 0)
3198                         maxpages = UINT_MAX;     2582                         maxpages = UINT_MAX;
3199         }                                        2583         }
3200         si->highest_bit = maxpages - 1;       !! 2584         p->highest_bit = maxpages - 1;
3201                                                  2585 
3202         if (!maxpages)                           2586         if (!maxpages)
3203                 return 0;                        2587                 return 0;
3204         swapfilepages = i_size_read(inode) >>    2588         swapfilepages = i_size_read(inode) >> PAGE_SHIFT;
3205         if (swapfilepages && maxpages > swapf    2589         if (swapfilepages && maxpages > swapfilepages) {
3206                 pr_warn("Swap area shorter th    2590                 pr_warn("Swap area shorter than signature indicates\n");
3207                 return 0;                        2591                 return 0;
3208         }                                        2592         }
3209         if (swap_header->info.nr_badpages &&     2593         if (swap_header->info.nr_badpages && S_ISREG(inode->i_mode))
3210                 return 0;                        2594                 return 0;
3211         if (swap_header->info.nr_badpages > M    2595         if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES)
3212                 return 0;                        2596                 return 0;
3213                                                  2597 
3214         return maxpages;                         2598         return maxpages;
3215 }                                                2599 }
3216                                                  2600 
3217 #define SWAP_CLUSTER_INFO_COLS                   2601 #define SWAP_CLUSTER_INFO_COLS                                          \
3218         DIV_ROUND_UP(L1_CACHE_BYTES, sizeof(s    2602         DIV_ROUND_UP(L1_CACHE_BYTES, sizeof(struct swap_cluster_info))
3219 #define SWAP_CLUSTER_SPACE_COLS                  2603 #define SWAP_CLUSTER_SPACE_COLS                                         \
3220         DIV_ROUND_UP(SWAP_ADDRESS_SPACE_PAGES    2604         DIV_ROUND_UP(SWAP_ADDRESS_SPACE_PAGES, SWAPFILE_CLUSTER)
3221 #define SWAP_CLUSTER_COLS                        2605 #define SWAP_CLUSTER_COLS                                               \
3222         max_t(unsigned int, SWAP_CLUSTER_INFO    2606         max_t(unsigned int, SWAP_CLUSTER_INFO_COLS, SWAP_CLUSTER_SPACE_COLS)
3223                                                  2607 
3224 static int setup_swap_map_and_extents(struct  !! 2608 static int setup_swap_map_and_extents(struct swap_info_struct *p,
3225                                         union    2609                                         union swap_header *swap_header,
3226                                         unsig    2610                                         unsigned char *swap_map,
                                                   >> 2611                                         struct swap_cluster_info *cluster_info,
3227                                         unsig    2612                                         unsigned long maxpages,
3228                                         secto    2613                                         sector_t *span)
3229 {                                                2614 {
                                                   >> 2615         unsigned int j, k;
3230         unsigned int nr_good_pages;              2616         unsigned int nr_good_pages;
3231         unsigned long i;                      << 
3232         int nr_extents;                          2617         int nr_extents;
                                                   >> 2618         unsigned long nr_clusters = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER);
                                                   >> 2619         unsigned long col = p->cluster_next / SWAPFILE_CLUSTER % SWAP_CLUSTER_COLS;
                                                   >> 2620         unsigned long i, idx;
3233                                                  2621 
3234         nr_good_pages = maxpages - 1;   /* om    2622         nr_good_pages = maxpages - 1;   /* omit header page */
3235                                                  2623 
                                                   >> 2624         cluster_list_init(&p->free_clusters);
                                                   >> 2625         cluster_list_init(&p->discard_clusters);
                                                   >> 2626 
3236         for (i = 0; i < swap_header->info.nr_    2627         for (i = 0; i < swap_header->info.nr_badpages; i++) {
3237                 unsigned int page_nr = swap_h    2628                 unsigned int page_nr = swap_header->info.badpages[i];
3238                 if (page_nr == 0 || page_nr >    2629                 if (page_nr == 0 || page_nr > swap_header->info.last_page)
3239                         return -EINVAL;          2630                         return -EINVAL;
3240                 if (page_nr < maxpages) {        2631                 if (page_nr < maxpages) {
3241                         swap_map[page_nr] = S    2632                         swap_map[page_nr] = SWAP_MAP_BAD;
3242                         nr_good_pages--;         2633                         nr_good_pages--;
                                                   >> 2634                         /*
                                                   >> 2635                          * Haven't marked the cluster free yet, no list
                                                   >> 2636                          * operation involved
                                                   >> 2637                          */
                                                   >> 2638                         inc_cluster_info_page(p, cluster_info, page_nr);
3243                 }                                2639                 }
3244         }                                        2640         }
3245                                                  2641 
                                                   >> 2642         /* Haven't marked the cluster free yet, no list operation involved */
                                                   >> 2643         for (i = maxpages; i < round_up(maxpages, SWAPFILE_CLUSTER); i++)
                                                   >> 2644                 inc_cluster_info_page(p, cluster_info, i);
                                                   >> 2645 
3246         if (nr_good_pages) {                     2646         if (nr_good_pages) {
3247                 swap_map[0] = SWAP_MAP_BAD;      2647                 swap_map[0] = SWAP_MAP_BAD;
3248                 si->max = maxpages;           !! 2648                 /*
3249                 si->pages = nr_good_pages;    !! 2649                  * Not mark the cluster free yet, no list
3250                 nr_extents = setup_swap_exten !! 2650                  * operation involved
                                                   >> 2651                  */
                                                   >> 2652                 inc_cluster_info_page(p, cluster_info, 0);
                                                   >> 2653                 p->max = maxpages;
                                                   >> 2654                 p->pages = nr_good_pages;
                                                   >> 2655                 nr_extents = setup_swap_extents(p, span);
3251                 if (nr_extents < 0)              2656                 if (nr_extents < 0)
3252                         return nr_extents;       2657                         return nr_extents;
3253                 nr_good_pages = si->pages;    !! 2658                 nr_good_pages = p->pages;
3254         }                                        2659         }
3255         if (!nr_good_pages) {                    2660         if (!nr_good_pages) {
3256                 pr_warn("Empty swap-file\n");    2661                 pr_warn("Empty swap-file\n");
3257                 return -EINVAL;                  2662                 return -EINVAL;
3258         }                                        2663         }
3259                                                  2664 
3260         return nr_extents;                    << 
3261 }                                             << 
3262                                               << 
3263 static struct swap_cluster_info *setup_cluste << 
3264                                               << 
3265                                               << 
3266 {                                             << 
3267         unsigned long nr_clusters = DIV_ROUND << 
3268         unsigned long col = si->cluster_next  << 
3269         struct swap_cluster_info *cluster_inf << 
3270         unsigned long i, j, k, idx;           << 
3271         int cpu, err = -ENOMEM;               << 
3272                                               << 
3273         cluster_info = kvcalloc(nr_clusters,  << 
3274         if (!cluster_info)                       2665         if (!cluster_info)
3275                 goto err;                     !! 2666                 return nr_extents;
3276                                               << 
3277         for (i = 0; i < nr_clusters; i++)     << 
3278                 spin_lock_init(&cluster_info[ << 
3279                                               << 
3280         si->cluster_next_cpu = alloc_percpu(u << 
3281         if (!si->cluster_next_cpu)            << 
3282                 goto err_free;                << 
3283                                                  2667 
3284         /* Random start position to help with << 
3285         for_each_possible_cpu(cpu)            << 
3286                 per_cpu(*si->cluster_next_cpu << 
3287                 get_random_u32_inclusive(1, s << 
3288                                               << 
3289         si->percpu_cluster = alloc_percpu(str << 
3290         if (!si->percpu_cluster)              << 
3291                 goto err_free;                << 
3292                                               << 
3293         for_each_possible_cpu(cpu) {          << 
3294                 struct percpu_cluster *cluste << 
3295                                               << 
3296                 cluster = per_cpu_ptr(si->per << 
3297                 for (i = 0; i < SWAP_NR_ORDER << 
3298                         cluster->next[i] = SW << 
3299         }                                     << 
3300                                               << 
3301         /*                                    << 
3302          * Mark unusable pages as unavailable << 
3303          * marked free yet, so no list operat << 
3304          *                                    << 
3305          * See setup_swap_map_and_extents():  << 
3306          * and the EOF part of the last clust << 
3307          */                                   << 
3308         inc_cluster_info_page(si, cluster_inf << 
3309         for (i = 0; i < swap_header->info.nr_ << 
3310                 inc_cluster_info_page(si, clu << 
3311                                       swap_he << 
3312         for (i = maxpages; i < round_up(maxpa << 
3313                 inc_cluster_info_page(si, clu << 
3314                                               << 
3315         INIT_LIST_HEAD(&si->free_clusters);   << 
3316         INIT_LIST_HEAD(&si->full_clusters);   << 
3317         INIT_LIST_HEAD(&si->discard_clusters) << 
3318                                               << 
3319         for (i = 0; i < SWAP_NR_ORDERS; i++)  << 
3320                 INIT_LIST_HEAD(&si->nonfull_c << 
3321                 INIT_LIST_HEAD(&si->frag_clus << 
3322                 si->frag_cluster_nr[i] = 0;   << 
3323         }                                     << 
3324                                                  2668 
3325         /*                                       2669         /*
3326          * Reduce false cache line sharing be    2670          * Reduce false cache line sharing between cluster_info and
3327          * sharing same address space.           2671          * sharing same address space.
3328          */                                      2672          */
3329         for (k = 0; k < SWAP_CLUSTER_COLS; k+    2673         for (k = 0; k < SWAP_CLUSTER_COLS; k++) {
3330                 j = (k + col) % SWAP_CLUSTER_    2674                 j = (k + col) % SWAP_CLUSTER_COLS;
3331                 for (i = 0; i < DIV_ROUND_UP(    2675                 for (i = 0; i < DIV_ROUND_UP(nr_clusters, SWAP_CLUSTER_COLS); i++) {
3332                         struct swap_cluster_i << 
3333                         idx = i * SWAP_CLUSTE    2676                         idx = i * SWAP_CLUSTER_COLS + j;
3334                         ci = cluster_info + i << 
3335                         if (idx >= nr_cluster    2677                         if (idx >= nr_clusters)
3336                                 continue;        2678                                 continue;
3337                         if (ci->count) {      !! 2679                         if (cluster_count(&cluster_info[idx]))
3338                                 ci->flags = C << 
3339                                 list_add_tail << 
3340                                 continue;        2680                                 continue;
3341                         }                     !! 2681                         cluster_set_flag(&cluster_info[idx], CLUSTER_FLAG_FREE);
3342                         ci->flags = CLUSTER_F !! 2682                         cluster_list_add_tail(&p->free_clusters, cluster_info,
3343                         list_add_tail(&ci->li !! 2683                                               idx);
3344                 }                                2684                 }
3345         }                                        2685         }
                                                   >> 2686         return nr_extents;
                                                   >> 2687 }
3346                                                  2688 
3347         return cluster_info;                  !! 2689 /*
                                                   >> 2690  * Helper to sys_swapon determining if a given swap
                                                   >> 2691  * backing device queue supports DISCARD operations.
                                                   >> 2692  */
                                                   >> 2693 static bool swap_discardable(struct swap_info_struct *si)
                                                   >> 2694 {
                                                   >> 2695         struct request_queue *q = bdev_get_queue(si->bdev);
3348                                                  2696 
3349 err_free:                                     !! 2697         if (!q || !blk_queue_discard(q))
3350         kvfree(cluster_info);                 !! 2698                 return false;
3351 err:                                          !! 2699 
3352         return ERR_PTR(err);                  !! 2700         return true;
3353 }                                                2701 }
3354                                                  2702 
3355 SYSCALL_DEFINE2(swapon, const char __user *,     2703 SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
3356 {                                                2704 {
3357         struct swap_info_struct *si;          !! 2705         struct swap_info_struct *p;
3358         struct filename *name;                   2706         struct filename *name;
3359         struct file *swap_file = NULL;           2707         struct file *swap_file = NULL;
3360         struct address_space *mapping;           2708         struct address_space *mapping;
3361         struct dentry *dentry;                << 
3362         int prio;                                2709         int prio;
3363         int error;                               2710         int error;
3364         union swap_header *swap_header;          2711         union swap_header *swap_header;
3365         int nr_extents;                          2712         int nr_extents;
3366         sector_t span;                           2713         sector_t span;
3367         unsigned long maxpages;                  2714         unsigned long maxpages;
3368         unsigned char *swap_map = NULL;          2715         unsigned char *swap_map = NULL;
3369         unsigned long *zeromap = NULL;        << 
3370         struct swap_cluster_info *cluster_inf    2716         struct swap_cluster_info *cluster_info = NULL;
3371         struct folio *folio = NULL;           !! 2717         unsigned long *frontswap_map = NULL;
                                                   >> 2718         struct page *page = NULL;
3372         struct inode *inode = NULL;              2719         struct inode *inode = NULL;
3373         bool inced_nr_rotate_swap = false;    << 
3374                                                  2720 
3375         if (swap_flags & ~SWAP_FLAGS_VALID)      2721         if (swap_flags & ~SWAP_FLAGS_VALID)
3376                 return -EINVAL;                  2722                 return -EINVAL;
3377                                                  2723 
3378         if (!capable(CAP_SYS_ADMIN))             2724         if (!capable(CAP_SYS_ADMIN))
3379                 return -EPERM;                   2725                 return -EPERM;
3380                                                  2726 
3381         if (!swap_avail_heads)                !! 2727         p = alloc_swap_info();
3382                 return -ENOMEM;               !! 2728         if (IS_ERR(p))
3383                                               !! 2729                 return PTR_ERR(p);
3384         si = alloc_swap_info();               << 
3385         if (IS_ERR(si))                       << 
3386                 return PTR_ERR(si);           << 
3387                                                  2730 
3388         INIT_WORK(&si->discard_work, swap_dis !! 2731         INIT_WORK(&p->discard_work, swap_discard_work);
3389         INIT_WORK(&si->reclaim_work, swap_rec << 
3390                                                  2732 
3391         name = getname(specialfile);             2733         name = getname(specialfile);
3392         if (IS_ERR(name)) {                      2734         if (IS_ERR(name)) {
3393                 error = PTR_ERR(name);           2735                 error = PTR_ERR(name);
3394                 name = NULL;                     2736                 name = NULL;
3395                 goto bad_swap;                   2737                 goto bad_swap;
3396         }                                        2738         }
3397         swap_file = file_open_name(name, O_RD !! 2739         swap_file = file_open_name(name, O_RDWR|O_LARGEFILE, 0);
3398         if (IS_ERR(swap_file)) {                 2740         if (IS_ERR(swap_file)) {
3399                 error = PTR_ERR(swap_file);      2741                 error = PTR_ERR(swap_file);
3400                 swap_file = NULL;                2742                 swap_file = NULL;
3401                 goto bad_swap;                   2743                 goto bad_swap;
3402         }                                        2744         }
3403                                                  2745 
3404         si->swap_file = swap_file;            !! 2746         p->swap_file = swap_file;
3405         mapping = swap_file->f_mapping;          2747         mapping = swap_file->f_mapping;
3406         dentry = swap_file->f_path.dentry;    << 
3407         inode = mapping->host;                   2748         inode = mapping->host;
3408                                                  2749 
3409         error = claim_swapfile(si, inode);    !! 2750         /* If S_ISREG(inode->i_mode) will do inode_lock(inode); */
                                                   >> 2751         error = claim_swapfile(p, inode);
3410         if (unlikely(error))                     2752         if (unlikely(error))
3411                 goto bad_swap;                   2753                 goto bad_swap;
3412                                                  2754 
3413         inode_lock(inode);                    << 
3414         if (d_unlinked(dentry) || cant_mount( << 
3415                 error = -ENOENT;              << 
3416                 goto bad_swap_unlock_inode;   << 
3417         }                                     << 
3418         if (IS_SWAPFILE(inode)) {             << 
3419                 error = -EBUSY;               << 
3420                 goto bad_swap_unlock_inode;   << 
3421         }                                     << 
3422                                               << 
3423         /*                                       2755         /*
3424          * Read the swap header.                 2756          * Read the swap header.
3425          */                                      2757          */
3426         if (!mapping->a_ops->read_folio) {    !! 2758         if (!mapping->a_ops->readpage) {
3427                 error = -EINVAL;                 2759                 error = -EINVAL;
3428                 goto bad_swap_unlock_inode;   !! 2760                 goto bad_swap;
3429         }                                        2761         }
3430         folio = read_mapping_folio(mapping, 0 !! 2762         page = read_mapping_page(mapping, 0, swap_file);
3431         if (IS_ERR(folio)) {                  !! 2763         if (IS_ERR(page)) {
3432                 error = PTR_ERR(folio);       !! 2764                 error = PTR_ERR(page);
3433                 goto bad_swap_unlock_inode;   !! 2765                 goto bad_swap;
3434         }                                        2766         }
3435         swap_header = kmap_local_folio(folio, !! 2767         swap_header = kmap(page);
3436                                                  2768 
3437         maxpages = read_swap_header(si, swap_ !! 2769         maxpages = read_swap_header(p, swap_header, inode);
3438         if (unlikely(!maxpages)) {               2770         if (unlikely(!maxpages)) {
3439                 error = -EINVAL;                 2771                 error = -EINVAL;
3440                 goto bad_swap_unlock_inode;   !! 2772                 goto bad_swap;
3441         }                                        2773         }
3442                                                  2774 
3443         /* OK, set up the swap map and apply     2775         /* OK, set up the swap map and apply the bad block list */
3444         swap_map = vzalloc(maxpages);            2776         swap_map = vzalloc(maxpages);
3445         if (!swap_map) {                         2777         if (!swap_map) {
3446                 error = -ENOMEM;                 2778                 error = -ENOMEM;
3447                 goto bad_swap_unlock_inode;   !! 2779                 goto bad_swap;
3448         }                                        2780         }
3449                                                  2781 
3450         error = swap_cgroup_swapon(si->type,  !! 2782         if (bdi_cap_stable_pages_required(inode_to_bdi(inode)))
3451         if (error)                            !! 2783                 p->flags |= SWP_STABLE_WRITES;
3452                 goto bad_swap_unlock_inode;   << 
3453                                               << 
3454         nr_extents = setup_swap_map_and_exten << 
3455                                               << 
3456         if (unlikely(nr_extents < 0)) {       << 
3457                 error = nr_extents;           << 
3458                 goto bad_swap_unlock_inode;   << 
3459         }                                     << 
3460                                                  2784 
3461         /*                                    !! 2785         if (p->bdev && blk_queue_nonrot(bdev_get_queue(p->bdev))) {
3462          * Use kvmalloc_array instead of bitm !! 2786                 int cpu;
3463          * be above MAX_PAGE_ORDER incase of  !! 2787                 unsigned long ci, nr_cluster;
3464          */                                   << 
3465         zeromap = kvmalloc_array(BITS_TO_LONG << 
3466                                     GFP_KERNE << 
3467         if (!zeromap) {                       << 
3468                 error = -ENOMEM;              << 
3469                 goto bad_swap_unlock_inode;   << 
3470         }                                     << 
3471                                                  2788 
3472         if (si->bdev && bdev_stable_writes(si !! 2789                 p->flags |= SWP_SOLIDSTATE;
3473                 si->flags |= SWP_STABLE_WRITE !! 2790                 /*
                                                   >> 2791                  * select a random position to start with to help wear leveling
                                                   >> 2792                  * SSD
                                                   >> 2793                  */
                                                   >> 2794                 p->cluster_next = 1 + (prandom_u32() % p->highest_bit);
                                                   >> 2795                 nr_cluster = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER);
3474                                                  2796 
3475         if (si->bdev && bdev_synchronous(si-> !! 2797                 cluster_info = kvzalloc(nr_cluster * sizeof(*cluster_info),
3476                 si->flags |= SWP_SYNCHRONOUS_ !! 2798                                         GFP_KERNEL);
                                                   >> 2799                 if (!cluster_info) {
                                                   >> 2800                         error = -ENOMEM;
                                                   >> 2801                         goto bad_swap;
                                                   >> 2802                 }
3477                                                  2803 
3478         if (si->bdev && bdev_nonrot(si->bdev) !! 2804                 for (ci = 0; ci < nr_cluster; ci++)
3479                 si->flags |= SWP_SOLIDSTATE;  !! 2805                         spin_lock_init(&((cluster_info + ci)->lock));
3480                                                  2806 
3481                 cluster_info = setup_clusters !! 2807                 p->percpu_cluster = alloc_percpu(struct percpu_cluster);
3482                 if (IS_ERR(cluster_info)) {   !! 2808                 if (!p->percpu_cluster) {
3483                         error = PTR_ERR(clust !! 2809                         error = -ENOMEM;
3484                         cluster_info = NULL;  !! 2810                         goto bad_swap;
3485                         goto bad_swap_unlock_ << 
3486                 }                                2811                 }
3487         } else {                              !! 2812                 for_each_possible_cpu(cpu) {
3488                 atomic_inc(&nr_rotate_swap);  !! 2813                         struct percpu_cluster *cluster;
3489                 inced_nr_rotate_swap = true;  !! 2814                         cluster = per_cpu_ptr(p->percpu_cluster, cpu);
                                                   >> 2815                         cluster_set_null(&cluster->index);
                                                   >> 2816                 }
                                                   >> 2817         }
                                                   >> 2818 
                                                   >> 2819         error = swap_cgroup_swapon(p->type, maxpages);
                                                   >> 2820         if (error)
                                                   >> 2821                 goto bad_swap;
                                                   >> 2822 
                                                   >> 2823         nr_extents = setup_swap_map_and_extents(p, swap_header, swap_map,
                                                   >> 2824                 cluster_info, maxpages, &span);
                                                   >> 2825         if (unlikely(nr_extents < 0)) {
                                                   >> 2826                 error = nr_extents;
                                                   >> 2827                 goto bad_swap;
3490         }                                        2828         }
                                                   >> 2829         /* frontswap enabled? set up bit-per-page map for frontswap */
                                                   >> 2830         if (IS_ENABLED(CONFIG_FRONTSWAP))
                                                   >> 2831                 frontswap_map = kvzalloc(BITS_TO_LONGS(maxpages) * sizeof(long),
                                                   >> 2832                                          GFP_KERNEL);
3491                                                  2833 
3492         if ((swap_flags & SWAP_FLAG_DISCARD)  !! 2834         if (p->bdev &&(swap_flags & SWAP_FLAG_DISCARD) && swap_discardable(p)) {
3493             si->bdev && bdev_max_discard_sect << 
3494                 /*                               2835                 /*
3495                  * When discard is enabled fo    2836                  * When discard is enabled for swap with no particular
3496                  * policy flagged, we set all    2837                  * policy flagged, we set all swap discard flags here in
3497                  * order to sustain backward     2838                  * order to sustain backward compatibility with older
3498                  * swapon(8) releases.           2839                  * swapon(8) releases.
3499                  */                              2840                  */
3500                 si->flags |= (SWP_DISCARDABLE !! 2841                 p->flags |= (SWP_DISCARDABLE | SWP_AREA_DISCARD |
3501                              SWP_PAGE_DISCARD    2842                              SWP_PAGE_DISCARD);
3502                                                  2843 
3503                 /*                               2844                 /*
3504                  * By flagging sys_swapon, a     2845                  * By flagging sys_swapon, a sysadmin can tell us to
3505                  * either do single-time area    2846                  * either do single-time area discards only, or to just
3506                  * perform discards for relea    2847                  * perform discards for released swap page-clusters.
3507                  * Now it's time to adjust th    2848                  * Now it's time to adjust the p->flags accordingly.
3508                  */                              2849                  */
3509                 if (swap_flags & SWAP_FLAG_DI    2850                 if (swap_flags & SWAP_FLAG_DISCARD_ONCE)
3510                         si->flags &= ~SWP_PAG !! 2851                         p->flags &= ~SWP_PAGE_DISCARD;
3511                 else if (swap_flags & SWAP_FL    2852                 else if (swap_flags & SWAP_FLAG_DISCARD_PAGES)
3512                         si->flags &= ~SWP_ARE !! 2853                         p->flags &= ~SWP_AREA_DISCARD;
3513                                                  2854 
3514                 /* issue a swapon-time discar    2855                 /* issue a swapon-time discard if it's still required */
3515                 if (si->flags & SWP_AREA_DISC !! 2856                 if (p->flags & SWP_AREA_DISCARD) {
3516                         int err = discard_swa !! 2857                         int err = discard_swap(p);
3517                         if (unlikely(err))       2858                         if (unlikely(err))
3518                                 pr_err("swapo    2859                                 pr_err("swapon: discard_swap(%p): %d\n",
3519                                         si, e !! 2860                                         p, err);
3520                 }                                2861                 }
3521         }                                        2862         }
3522                                                  2863 
3523         error = init_swap_address_space(si->t !! 2864         error = init_swap_address_space(p->type, maxpages);
3524         if (error)                            << 
3525                 goto bad_swap_unlock_inode;   << 
3526                                               << 
3527         error = zswap_swapon(si->type, maxpag << 
3528         if (error)                               2865         if (error)
3529                 goto free_swap_address_space; !! 2866                 goto bad_swap;
3530                                               << 
3531         /*                                    << 
3532          * Flush any pending IO and dirty map << 
3533          * swap device.                       << 
3534          */                                   << 
3535         inode->i_flags |= S_SWAPFILE;         << 
3536         error = inode_drain_writes(inode);    << 
3537         if (error) {                          << 
3538                 inode->i_flags &= ~S_SWAPFILE << 
3539                 goto free_swap_zswap;         << 
3540         }                                     << 
3541                                                  2867 
3542         mutex_lock(&swapon_mutex);               2868         mutex_lock(&swapon_mutex);
3543         prio = -1;                               2869         prio = -1;
3544         if (swap_flags & SWAP_FLAG_PREFER)       2870         if (swap_flags & SWAP_FLAG_PREFER)
3545                 prio =                           2871                 prio =
3546                   (swap_flags & SWAP_FLAG_PRI    2872                   (swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT;
3547         enable_swap_info(si, prio, swap_map,  !! 2873         enable_swap_info(p, prio, swap_map, cluster_info, frontswap_map);
3548                                                  2874 
3549         pr_info("Adding %uk swap on %s.  Prio !! 2875         pr_info("Adding %uk swap on %s.  Priority:%d extents:%d across:%lluk %s%s%s%s%s\n",
3550                 K(si->pages), name->name, si- !! 2876                 p->pages<<(PAGE_SHIFT-10), name->name, p->prio,
3551                 K((unsigned long long)span),  !! 2877                 nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10),
3552                 (si->flags & SWP_SOLIDSTATE)  !! 2878                 (p->flags & SWP_SOLIDSTATE) ? "SS" : "",
3553                 (si->flags & SWP_DISCARDABLE) !! 2879                 (p->flags & SWP_DISCARDABLE) ? "D" : "",
3554                 (si->flags & SWP_AREA_DISCARD !! 2880                 (p->flags & SWP_AREA_DISCARD) ? "s" : "",
3555                 (si->flags & SWP_PAGE_DISCARD !! 2881                 (p->flags & SWP_PAGE_DISCARD) ? "c" : "",
                                                   >> 2882                 (frontswap_map) ? "FS" : "");
3556                                                  2883 
3557         mutex_unlock(&swapon_mutex);             2884         mutex_unlock(&swapon_mutex);
3558         atomic_inc(&proc_poll_event);            2885         atomic_inc(&proc_poll_event);
3559         wake_up_interruptible(&proc_poll_wait    2886         wake_up_interruptible(&proc_poll_wait);
3560                                                  2887 
                                                   >> 2888         if (S_ISREG(inode->i_mode))
                                                   >> 2889                 inode->i_flags |= S_SWAPFILE;
3561         error = 0;                               2890         error = 0;
3562         goto out;                                2891         goto out;
3563 free_swap_zswap:                              << 
3564         zswap_swapoff(si->type);              << 
3565 free_swap_address_space:                      << 
3566         exit_swap_address_space(si->type);    << 
3567 bad_swap_unlock_inode:                        << 
3568         inode_unlock(inode);                  << 
3569 bad_swap:                                        2892 bad_swap:
3570         free_percpu(si->percpu_cluster);      !! 2893         free_percpu(p->percpu_cluster);
3571         si->percpu_cluster = NULL;            !! 2894         p->percpu_cluster = NULL;
3572         free_percpu(si->cluster_next_cpu);    !! 2895         if (inode && S_ISBLK(inode->i_mode) && p->bdev) {
3573         si->cluster_next_cpu = NULL;          !! 2896                 set_blocksize(p->bdev, p->old_block_size);
3574         inode = NULL;                         !! 2897                 blkdev_put(p->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
3575         destroy_swap_extents(si);             !! 2898         }
3576         swap_cgroup_swapoff(si->type);        !! 2899         destroy_swap_extents(p);
                                                   >> 2900         swap_cgroup_swapoff(p->type);
3577         spin_lock(&swap_lock);                   2901         spin_lock(&swap_lock);
3578         si->swap_file = NULL;                 !! 2902         p->swap_file = NULL;
3579         si->flags = 0;                        !! 2903         p->flags = 0;
3580         spin_unlock(&swap_lock);                 2904         spin_unlock(&swap_lock);
3581         vfree(swap_map);                         2905         vfree(swap_map);
3582         kvfree(zeromap);                      << 
3583         kvfree(cluster_info);                    2906         kvfree(cluster_info);
3584         if (inced_nr_rotate_swap)             !! 2907         kvfree(frontswap_map);
3585                 atomic_dec(&nr_rotate_swap);  !! 2908         if (swap_file) {
3586         if (swap_file)                        !! 2909                 if (inode && S_ISREG(inode->i_mode)) {
                                                   >> 2910                         inode_unlock(inode);
                                                   >> 2911                         inode = NULL;
                                                   >> 2912                 }
3587                 filp_close(swap_file, NULL);     2913                 filp_close(swap_file, NULL);
                                                   >> 2914         }
3588 out:                                             2915 out:
3589         if (!IS_ERR_OR_NULL(folio))           !! 2916         if (page && !IS_ERR(page)) {
3590                 folio_release_kmap(folio, swa !! 2917                 kunmap(page);
                                                   >> 2918                 put_page(page);
                                                   >> 2919         }
3591         if (name)                                2920         if (name)
3592                 putname(name);                   2921                 putname(name);
3593         if (inode)                            !! 2922         if (inode && S_ISREG(inode->i_mode))
3594                 inode_unlock(inode);             2923                 inode_unlock(inode);
3595         if (!error)                              2924         if (!error)
3596                 enable_swap_slots_cache();       2925                 enable_swap_slots_cache();
3597         return error;                            2926         return error;
3598 }                                                2927 }
3599                                                  2928 
3600 void si_swapinfo(struct sysinfo *val)            2929 void si_swapinfo(struct sysinfo *val)
3601 {                                                2930 {
3602         unsigned int type;                       2931         unsigned int type;
3603         unsigned long nr_to_be_unused = 0;       2932         unsigned long nr_to_be_unused = 0;
3604                                                  2933 
3605         spin_lock(&swap_lock);                   2934         spin_lock(&swap_lock);
3606         for (type = 0; type < nr_swapfiles; t    2935         for (type = 0; type < nr_swapfiles; type++) {
3607                 struct swap_info_struct *si =    2936                 struct swap_info_struct *si = swap_info[type];
3608                                                  2937 
3609                 if ((si->flags & SWP_USED) &&    2938                 if ((si->flags & SWP_USED) && !(si->flags & SWP_WRITEOK))
3610                         nr_to_be_unused += RE !! 2939                         nr_to_be_unused += si->inuse_pages;
3611         }                                        2940         }
3612         val->freeswap = atomic_long_read(&nr_    2941         val->freeswap = atomic_long_read(&nr_swap_pages) + nr_to_be_unused;
3613         val->totalswap = total_swap_pages + n    2942         val->totalswap = total_swap_pages + nr_to_be_unused;
3614         spin_unlock(&swap_lock);                 2943         spin_unlock(&swap_lock);
3615 }                                                2944 }
3616                                                  2945 
3617 /*                                               2946 /*
3618  * Verify that nr swap entries are valid and  !! 2947  * Verify that a swap entry is valid and increment its swap map count.
3619  *                                               2948  *
3620  * Returns error code in following case.         2949  * Returns error code in following case.
3621  * - success -> 0                                2950  * - success -> 0
3622  * - swp_entry is invalid -> EINVAL              2951  * - swp_entry is invalid -> EINVAL
3623  * - swp_entry is migration entry -> EINVAL      2952  * - swp_entry is migration entry -> EINVAL
3624  * - swap-cache reference is requested but th    2953  * - swap-cache reference is requested but there is already one. -> EEXIST
3625  * - swap-cache reference is requested but th    2954  * - swap-cache reference is requested but the entry is not used. -> ENOENT
3626  * - swap-mapped reference requested but need    2955  * - swap-mapped reference requested but needs continued swap count. -> ENOMEM
3627  */                                              2956  */
3628 static int __swap_duplicate(swp_entry_t entry !! 2957 static int __swap_duplicate(swp_entry_t entry, unsigned char usage)
3629 {                                                2958 {
3630         struct swap_info_struct *si;          !! 2959         struct swap_info_struct *p;
3631         struct swap_cluster_info *ci;            2960         struct swap_cluster_info *ci;
3632         unsigned long offset;                 !! 2961         unsigned long offset, type;
3633         unsigned char count;                     2962         unsigned char count;
3634         unsigned char has_cache;                 2963         unsigned char has_cache;
3635         int err, i;                           !! 2964         int err = -EINVAL;
3636                                                  2965 
3637         si = swp_swap_info(entry);            !! 2966         if (non_swap_entry(entry))
                                                   >> 2967                 goto out;
3638                                                  2968 
                                                   >> 2969         type = swp_type(entry);
                                                   >> 2970         if (type >= nr_swapfiles)
                                                   >> 2971                 goto bad_file;
                                                   >> 2972         p = swap_info[type];
3639         offset = swp_offset(entry);              2973         offset = swp_offset(entry);
3640         VM_WARN_ON(nr > SWAPFILE_CLUSTER - of !! 2974         if (unlikely(offset >= p->max))
3641         VM_WARN_ON(usage == 1 && nr > 1);     !! 2975                 goto out;
3642         ci = lock_cluster_or_swap_info(si, of << 
3643                                               << 
3644         err = 0;                              << 
3645         for (i = 0; i < nr; i++) {            << 
3646                 count = si->swap_map[offset + << 
3647                                               << 
3648                 /*                            << 
3649                  * swapin_readahead() doesn't << 
3650                  * swap entry could be SWAP_M << 
3651                  */                           << 
3652                 if (unlikely(swap_count(count << 
3653                         err = -ENOENT;        << 
3654                         goto unlock_out;      << 
3655                 }                             << 
3656                                                  2976 
3657                 has_cache = count & SWAP_HAS_ !! 2977         ci = lock_cluster_or_swap_info(p, offset);
3658                 count &= ~SWAP_HAS_CACHE;     << 
3659                                                  2978 
3660                 if (!count && !has_cache) {   !! 2979         count = p->swap_map[offset];
3661                         err = -ENOENT;        << 
3662                 } else if (usage == SWAP_HAS_ << 
3663                         if (has_cache)        << 
3664                                 err = -EEXIST << 
3665                 } else if ((count & ~COUNT_CO << 
3666                         err = -EINVAL;        << 
3667                 }                             << 
3668                                                  2980 
3669                 if (err)                      !! 2981         /*
3670                         goto unlock_out;      !! 2982          * swapin_readahead() doesn't check if a swap entry is valid, so the
                                                   >> 2983          * swap entry could be SWAP_MAP_BAD. Check here with lock held.
                                                   >> 2984          */
                                                   >> 2985         if (unlikely(swap_count(count) == SWAP_MAP_BAD)) {
                                                   >> 2986                 err = -ENOENT;
                                                   >> 2987                 goto unlock_out;
3671         }                                        2988         }
3672                                                  2989 
3673         for (i = 0; i < nr; i++) {            !! 2990         has_cache = count & SWAP_HAS_CACHE;
3674                 count = si->swap_map[offset + !! 2991         count &= ~SWAP_HAS_CACHE;
3675                 has_cache = count & SWAP_HAS_ !! 2992         err = 0;
3676                 count &= ~SWAP_HAS_CACHE;     << 
3677                                                  2993 
3678                 if (usage == SWAP_HAS_CACHE)  !! 2994         if (usage == SWAP_HAS_CACHE) {
                                                   >> 2995 
                                                   >> 2996                 /* set SWAP_HAS_CACHE if there is no cache and entry is used */
                                                   >> 2997                 if (!has_cache && count)
3679                         has_cache = SWAP_HAS_    2998                         has_cache = SWAP_HAS_CACHE;
3680                 else if ((count & ~COUNT_CONT !! 2999                 else if (has_cache)             /* someone else added cache */
                                                   >> 3000                         err = -EEXIST;
                                                   >> 3001                 else                            /* no users remaining */
                                                   >> 3002                         err = -ENOENT;
                                                   >> 3003 
                                                   >> 3004         } else if (count || has_cache) {
                                                   >> 3005 
                                                   >> 3006                 if ((count & ~COUNT_CONTINUED) < SWAP_MAP_MAX)
3681                         count += usage;          3007                         count += usage;
3682                 else if (swap_count_continued !! 3008                 else if ((count & ~COUNT_CONTINUED) > SWAP_MAP_MAX)
                                                   >> 3009                         err = -EINVAL;
                                                   >> 3010                 else if (swap_count_continued(p, offset, count))
3683                         count = COUNT_CONTINU    3011                         count = COUNT_CONTINUED;
3684                 else {                        !! 3012                 else
3685                         /*                    << 
3686                          * Don't need to roll << 
3687                          * usage == 1, there  << 
3688                          */                   << 
3689                         err = -ENOMEM;           3013                         err = -ENOMEM;
3690                         goto unlock_out;      !! 3014         } else
3691                 }                             !! 3015                 err = -ENOENT;                  /* unused swap entry */
3692                                                  3016 
3693                 WRITE_ONCE(si->swap_map[offse !! 3017         p->swap_map[offset] = count | has_cache;
3694         }                                     << 
3695                                                  3018 
3696 unlock_out:                                      3019 unlock_out:
3697         unlock_cluster_or_swap_info(si, ci);  !! 3020         unlock_cluster_or_swap_info(p, ci);
                                                   >> 3021 out:
3698         return err;                              3022         return err;
                                                   >> 3023 
                                                   >> 3024 bad_file:
                                                   >> 3025         pr_err("swap_dup: %s%08lx\n", Bad_file, entry.val);
                                                   >> 3026         goto out;
3699 }                                                3027 }
3700                                                  3028 
3701 /*                                               3029 /*
3702  * Help swapoff by noting that swap entry bel    3030  * Help swapoff by noting that swap entry belongs to shmem/tmpfs
3703  * (in which case its reference count is neve    3031  * (in which case its reference count is never incremented).
3704  */                                              3032  */
3705 void swap_shmem_alloc(swp_entry_t entry, int  !! 3033 void swap_shmem_alloc(swp_entry_t entry)
3706 {                                                3034 {
3707         __swap_duplicate(entry, SWAP_MAP_SHME !! 3035         __swap_duplicate(entry, SWAP_MAP_SHMEM);
3708 }                                                3036 }
3709                                                  3037 
3710 /*                                               3038 /*
3711  * Increase reference count of swap entry by     3039  * Increase reference count of swap entry by 1.
3712  * Returns 0 for success, or -ENOMEM if a swa    3040  * Returns 0 for success, or -ENOMEM if a swap_count_continuation is required
3713  * but could not be atomically allocated.  Re    3041  * but could not be atomically allocated.  Returns 0, just as if it succeeded,
3714  * if __swap_duplicate() fails for another re    3042  * if __swap_duplicate() fails for another reason (-EINVAL or -ENOENT), which
3715  * might occur if a page table entry has got     3043  * might occur if a page table entry has got corrupted.
3716  */                                              3044  */
3717 int swap_duplicate(swp_entry_t entry)            3045 int swap_duplicate(swp_entry_t entry)
3718 {                                                3046 {
3719         int err = 0;                             3047         int err = 0;
3720                                                  3048 
3721         while (!err && __swap_duplicate(entry !! 3049         while (!err && __swap_duplicate(entry, 1) == -ENOMEM)
3722                 err = add_swap_count_continua    3050                 err = add_swap_count_continuation(entry, GFP_ATOMIC);
3723         return err;                              3051         return err;
3724 }                                                3052 }
3725                                                  3053 
3726 /*                                               3054 /*
3727  * @entry: first swap entry from which we all !! 3055  * @entry: swap entry for which we allocate swap cache.
3728  *                                               3056  *
3729  * Called when allocating swap cache for exis !! 3057  * Called when allocating swap cache for existing swap entry,
3730  * This can return error codes. Returns 0 at     3058  * This can return error codes. Returns 0 at success.
3731  * -EEXIST means there is a swap cache.       !! 3059  * -EBUSY means there is a swap cache.
3732  * Note: return code is different from swap_d    3060  * Note: return code is different from swap_duplicate().
3733  */                                              3061  */
3734 int swapcache_prepare(swp_entry_t entry, int  !! 3062 int swapcache_prepare(swp_entry_t entry)
3735 {                                                3063 {
3736         return __swap_duplicate(entry, SWAP_H !! 3064         return __swap_duplicate(entry, SWAP_HAS_CACHE);
3737 }                                                3065 }
3738                                                  3066 
3739 void swapcache_clear(struct swap_info_struct  !! 3067 struct swap_info_struct *page_swap_info(struct page *page)
3740 {                                                3068 {
3741         unsigned long offset = swp_offset(ent !! 3069         swp_entry_t swap = { .val = page_private(page) };
3742                                               !! 3070         return swap_info[swp_type(swap)];
3743         cluster_swap_free_nr(si, offset, nr,  << 
3744 }                                             << 
3745                                               << 
3746 struct swap_info_struct *swp_swap_info(swp_en << 
3747 {                                             << 
3748         return swap_type_to_swap_info(swp_typ << 
3749 }                                                3071 }
3750                                                  3072 
3751 /*                                               3073 /*
3752  * out-of-line methods to avoid include hell. !! 3074  * out-of-line __page_file_ methods to avoid include hell.
3753  */                                              3075  */
3754 struct address_space *swapcache_mapping(struc !! 3076 struct address_space *__page_file_mapping(struct page *page)
3755 {                                                3077 {
3756         return swp_swap_info(folio->swap)->sw !! 3078         VM_BUG_ON_PAGE(!PageSwapCache(page), page);
                                                   >> 3079         return page_swap_info(page)->swap_file->f_mapping;
3757 }                                                3080 }
3758 EXPORT_SYMBOL_GPL(swapcache_mapping);         !! 3081 EXPORT_SYMBOL_GPL(__page_file_mapping);
3759                                                  3082 
3760 pgoff_t __folio_swap_cache_index(struct folio !! 3083 pgoff_t __page_file_index(struct page *page)
3761 {                                                3084 {
3762         return swap_cache_index(folio->swap); !! 3085         swp_entry_t swap = { .val = page_private(page) };
                                                   >> 3086         VM_BUG_ON_PAGE(!PageSwapCache(page), page);
                                                   >> 3087         return swp_offset(swap);
3763 }                                                3088 }
3764 EXPORT_SYMBOL_GPL(__folio_swap_cache_index);  !! 3089 EXPORT_SYMBOL_GPL(__page_file_index);
3765                                                  3090 
3766 /*                                               3091 /*
3767  * add_swap_count_continuation - called when     3092  * add_swap_count_continuation - called when a swap count is duplicated
3768  * beyond SWAP_MAP_MAX, it allocates a new pa    3093  * beyond SWAP_MAP_MAX, it allocates a new page and links that to the entry's
3769  * page of the original vmalloc'ed swap_map,     3094  * page of the original vmalloc'ed swap_map, to hold the continuation count
3770  * (for that entry and for its neighbouring P    3095  * (for that entry and for its neighbouring PAGE_SIZE swap entries).  Called
3771  * again when count is duplicated beyond SWAP    3096  * again when count is duplicated beyond SWAP_MAP_MAX * SWAP_CONT_MAX, etc.
3772  *                                               3097  *
3773  * These continuation pages are seldom refere    3098  * These continuation pages are seldom referenced: the common paths all work
3774  * on the original swap_map, only referring t    3099  * on the original swap_map, only referring to a continuation page when the
3775  * low "digit" of a count is incremented or d    3100  * low "digit" of a count is incremented or decremented through SWAP_MAP_MAX.
3776  *                                               3101  *
3777  * add_swap_count_continuation(, GFP_ATOMIC)     3102  * add_swap_count_continuation(, GFP_ATOMIC) can be called while holding
3778  * page table locks; if it fails, add_swap_co    3103  * page table locks; if it fails, add_swap_count_continuation(, GFP_KERNEL)
3779  * can be called after dropping locks.           3104  * can be called after dropping locks.
3780  */                                              3105  */
3781 int add_swap_count_continuation(swp_entry_t e    3106 int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)
3782 {                                                3107 {
3783         struct swap_info_struct *si;             3108         struct swap_info_struct *si;
3784         struct swap_cluster_info *ci;            3109         struct swap_cluster_info *ci;
3785         struct page *head;                       3110         struct page *head;
3786         struct page *page;                       3111         struct page *page;
3787         struct page *list_page;                  3112         struct page *list_page;
3788         pgoff_t offset;                          3113         pgoff_t offset;
3789         unsigned char count;                     3114         unsigned char count;
3790         int ret = 0;                          << 
3791                                                  3115 
3792         /*                                       3116         /*
3793          * When debugging, it's easier to use    3117          * When debugging, it's easier to use __GFP_ZERO here; but it's better
3794          * for latency not to zero a page whi    3118          * for latency not to zero a page while GFP_ATOMIC and holding locks.
3795          */                                      3119          */
3796         page = alloc_page(gfp_mask | __GFP_HI    3120         page = alloc_page(gfp_mask | __GFP_HIGHMEM);
3797                                                  3121 
3798         si = get_swap_device(entry);          !! 3122         si = swap_info_get(entry);
3799         if (!si) {                               3123         if (!si) {
3800                 /*                               3124                 /*
3801                  * An acceptable race has occ    3125                  * An acceptable race has occurred since the failing
3802                  * __swap_duplicate(): the sw !! 3126                  * __swap_duplicate(): the swap entry has been freed,
                                                   >> 3127                  * perhaps even the whole swap_map cleared for swapoff.
3803                  */                              3128                  */
3804                 goto outer;                      3129                 goto outer;
3805         }                                        3130         }
3806         spin_lock(&si->lock);                 << 
3807                                                  3131 
3808         offset = swp_offset(entry);              3132         offset = swp_offset(entry);
3809                                                  3133 
3810         ci = lock_cluster(si, offset);           3134         ci = lock_cluster(si, offset);
3811                                                  3135 
3812         count = swap_count(si->swap_map[offse !! 3136         count = si->swap_map[offset] & ~SWAP_HAS_CACHE;
3813                                                  3137 
3814         if ((count & ~COUNT_CONTINUED) != SWA    3138         if ((count & ~COUNT_CONTINUED) != SWAP_MAP_MAX) {
3815                 /*                               3139                 /*
3816                  * The higher the swap count,    3140                  * The higher the swap count, the more likely it is that tasks
3817                  * will race to add swap coun    3141                  * will race to add swap count continuation: we need to avoid
3818                  * over-provisioning.            3142                  * over-provisioning.
3819                  */                              3143                  */
3820                 goto out;                        3144                 goto out;
3821         }                                        3145         }
3822                                                  3146 
3823         if (!page) {                             3147         if (!page) {
3824                 ret = -ENOMEM;                !! 3148                 unlock_cluster(ci);
3825                 goto out;                     !! 3149                 spin_unlock(&si->lock);
                                                   >> 3150                 return -ENOMEM;
3826         }                                        3151         }
3827                                                  3152 
                                                   >> 3153         /*
                                                   >> 3154          * We are fortunate that although vmalloc_to_page uses pte_offset_map,
                                                   >> 3155          * no architecture is using highmem pages for kernel page tables: so it
                                                   >> 3156          * will not corrupt the GFP_ATOMIC caller's atomic page table kmaps.
                                                   >> 3157          */
3828         head = vmalloc_to_page(si->swap_map +    3158         head = vmalloc_to_page(si->swap_map + offset);
3829         offset &= ~PAGE_MASK;                    3159         offset &= ~PAGE_MASK;
3830                                                  3160 
3831         spin_lock(&si->cont_lock);            << 
3832         /*                                       3161         /*
3833          * Page allocation does not initializ    3162          * Page allocation does not initialize the page's lru field,
3834          * but it does always reset its priva    3163          * but it does always reset its private field.
3835          */                                      3164          */
3836         if (!page_private(head)) {               3165         if (!page_private(head)) {
3837                 BUG_ON(count & COUNT_CONTINUE    3166                 BUG_ON(count & COUNT_CONTINUED);
3838                 INIT_LIST_HEAD(&head->lru);      3167                 INIT_LIST_HEAD(&head->lru);
3839                 set_page_private(head, SWP_CO    3168                 set_page_private(head, SWP_CONTINUED);
3840                 si->flags |= SWP_CONTINUED;      3169                 si->flags |= SWP_CONTINUED;
3841         }                                        3170         }
3842                                                  3171 
3843         list_for_each_entry(list_page, &head-    3172         list_for_each_entry(list_page, &head->lru, lru) {
3844                 unsigned char *map;              3173                 unsigned char *map;
3845                                                  3174 
3846                 /*                               3175                 /*
3847                  * If the previous map said n    3176                  * If the previous map said no continuation, but we've found
3848                  * a continuation page, free     3177                  * a continuation page, free our allocation and use this one.
3849                  */                              3178                  */
3850                 if (!(count & COUNT_CONTINUED    3179                 if (!(count & COUNT_CONTINUED))
3851                         goto out_unlock_cont; !! 3180                         goto out;
3852                                                  3181 
3853                 map = kmap_local_page(list_pa !! 3182                 map = kmap_atomic(list_page) + offset;
3854                 count = *map;                    3183                 count = *map;
3855                 kunmap_local(map);            !! 3184                 kunmap_atomic(map);
3856                                                  3185 
3857                 /*                               3186                 /*
3858                  * If this continuation count    3187                  * If this continuation count now has some space in it,
3859                  * free our allocation and us    3188                  * free our allocation and use this one.
3860                  */                              3189                  */
3861                 if ((count & ~COUNT_CONTINUED    3190                 if ((count & ~COUNT_CONTINUED) != SWAP_CONT_MAX)
3862                         goto out_unlock_cont; !! 3191                         goto out;
3863         }                                        3192         }
3864                                                  3193 
3865         list_add_tail(&page->lru, &head->lru)    3194         list_add_tail(&page->lru, &head->lru);
3866         page = NULL;                    /* no    3195         page = NULL;                    /* now it's attached, don't free it */
3867 out_unlock_cont:                              << 
3868         spin_unlock(&si->cont_lock);          << 
3869 out:                                             3196 out:
3870         unlock_cluster(ci);                      3197         unlock_cluster(ci);
3871         spin_unlock(&si->lock);                  3198         spin_unlock(&si->lock);
3872         put_swap_device(si);                  << 
3873 outer:                                           3199 outer:
3874         if (page)                                3200         if (page)
3875                 __free_page(page);               3201                 __free_page(page);
3876         return ret;                           !! 3202         return 0;
3877 }                                                3203 }
3878                                                  3204 
3879 /*                                               3205 /*
3880  * swap_count_continued - when the original s    3206  * swap_count_continued - when the original swap_map count is incremented
3881  * from SWAP_MAP_MAX, check if there is alrea    3207  * from SWAP_MAP_MAX, check if there is already a continuation page to carry
3882  * into, carry if so, or else fail until a ne    3208  * into, carry if so, or else fail until a new continuation page is allocated;
3883  * when the original swap_map count is decrem    3209  * when the original swap_map count is decremented from 0 with continuation,
3884  * borrow from the continuation and report wh    3210  * borrow from the continuation and report whether it still holds more.
3885  * Called while __swap_duplicate() or swap_en    3211  * Called while __swap_duplicate() or swap_entry_free() holds swap or cluster
3886  * lock.                                         3212  * lock.
3887  */                                              3213  */
3888 static bool swap_count_continued(struct swap_    3214 static bool swap_count_continued(struct swap_info_struct *si,
3889                                  pgoff_t offs    3215                                  pgoff_t offset, unsigned char count)
3890 {                                                3216 {
3891         struct page *head;                       3217         struct page *head;
3892         struct page *page;                       3218         struct page *page;
3893         unsigned char *map;                      3219         unsigned char *map;
3894         bool ret;                             << 
3895                                                  3220 
3896         head = vmalloc_to_page(si->swap_map +    3221         head = vmalloc_to_page(si->swap_map + offset);
3897         if (page_private(head) != SWP_CONTINU    3222         if (page_private(head) != SWP_CONTINUED) {
3898                 BUG_ON(count & COUNT_CONTINUE    3223                 BUG_ON(count & COUNT_CONTINUED);
3899                 return false;           /* ne    3224                 return false;           /* need to add count continuation */
3900         }                                        3225         }
3901                                                  3226 
3902         spin_lock(&si->cont_lock);            << 
3903         offset &= ~PAGE_MASK;                    3227         offset &= ~PAGE_MASK;
3904         page = list_next_entry(head, lru);    !! 3228         page = list_entry(head->lru.next, struct page, lru);
3905         map = kmap_local_page(page) + offset; !! 3229         map = kmap_atomic(page) + offset;
3906                                                  3230 
3907         if (count == SWAP_MAP_MAX)      /* in    3231         if (count == SWAP_MAP_MAX)      /* initial increment from swap_map */
3908                 goto init_map;          /* ju    3232                 goto init_map;          /* jump over SWAP_CONT_MAX checks */
3909                                                  3233 
3910         if (count == (SWAP_MAP_MAX | COUNT_CO    3234         if (count == (SWAP_MAP_MAX | COUNT_CONTINUED)) { /* incrementing */
3911                 /*                               3235                 /*
3912                  * Think of how you add 1 to     3236                  * Think of how you add 1 to 999
3913                  */                              3237                  */
3914                 while (*map == (SWAP_CONT_MAX    3238                 while (*map == (SWAP_CONT_MAX | COUNT_CONTINUED)) {
3915                         kunmap_local(map);    !! 3239                         kunmap_atomic(map);
3916                         page = list_next_entr !! 3240                         page = list_entry(page->lru.next, struct page, lru);
3917                         BUG_ON(page == head);    3241                         BUG_ON(page == head);
3918                         map = kmap_local_page !! 3242                         map = kmap_atomic(page) + offset;
3919                 }                                3243                 }
3920                 if (*map == SWAP_CONT_MAX) {     3244                 if (*map == SWAP_CONT_MAX) {
3921                         kunmap_local(map);    !! 3245                         kunmap_atomic(map);
3922                         page = list_next_entr !! 3246                         page = list_entry(page->lru.next, struct page, lru);
3923                         if (page == head) {   !! 3247                         if (page == head)
3924                                 ret = false;  !! 3248                                 return false;   /* add count continuation */
3925                                 goto out;     !! 3249                         map = kmap_atomic(page) + offset;
3926                         }                     << 
3927                         map = kmap_local_page << 
3928 init_map:               *map = 0;                3250 init_map:               *map = 0;               /* we didn't zero the page */
3929                 }                                3251                 }
3930                 *map += 1;                       3252                 *map += 1;
3931                 kunmap_local(map);            !! 3253                 kunmap_atomic(map);
3932                 while ((page = list_prev_entr !! 3254                 page = list_entry(page->lru.prev, struct page, lru);
3933                         map = kmap_local_page !! 3255                 while (page != head) {
                                                   >> 3256                         map = kmap_atomic(page) + offset;
3934                         *map = COUNT_CONTINUE    3257                         *map = COUNT_CONTINUED;
3935                         kunmap_local(map);    !! 3258                         kunmap_atomic(map);
                                                   >> 3259                         page = list_entry(page->lru.prev, struct page, lru);
3936                 }                                3260                 }
3937                 ret = true;                   !! 3261                 return true;                    /* incremented */
3938                                                  3262 
3939         } else {                                 3263         } else {                                /* decrementing */
3940                 /*                               3264                 /*
3941                  * Think of how you subtract     3265                  * Think of how you subtract 1 from 1000
3942                  */                              3266                  */
3943                 BUG_ON(count != COUNT_CONTINU    3267                 BUG_ON(count != COUNT_CONTINUED);
3944                 while (*map == COUNT_CONTINUE    3268                 while (*map == COUNT_CONTINUED) {
3945                         kunmap_local(map);    !! 3269                         kunmap_atomic(map);
3946                         page = list_next_entr !! 3270                         page = list_entry(page->lru.next, struct page, lru);
3947                         BUG_ON(page == head);    3271                         BUG_ON(page == head);
3948                         map = kmap_local_page !! 3272                         map = kmap_atomic(page) + offset;
3949                 }                                3273                 }
3950                 BUG_ON(*map == 0);               3274                 BUG_ON(*map == 0);
3951                 *map -= 1;                       3275                 *map -= 1;
3952                 if (*map == 0)                   3276                 if (*map == 0)
3953                         count = 0;               3277                         count = 0;
3954                 kunmap_local(map);            !! 3278                 kunmap_atomic(map);
3955                 while ((page = list_prev_entr !! 3279                 page = list_entry(page->lru.prev, struct page, lru);
3956                         map = kmap_local_page !! 3280                 while (page != head) {
                                                   >> 3281                         map = kmap_atomic(page) + offset;
3957                         *map = SWAP_CONT_MAX     3282                         *map = SWAP_CONT_MAX | count;
3958                         count = COUNT_CONTINU    3283                         count = COUNT_CONTINUED;
3959                         kunmap_local(map);    !! 3284                         kunmap_atomic(map);
                                                   >> 3285                         page = list_entry(page->lru.prev, struct page, lru);
3960                 }                                3286                 }
3961                 ret = count == COUNT_CONTINUE !! 3287                 return count == COUNT_CONTINUED;
3962         }                                        3288         }
3963 out:                                          << 
3964         spin_unlock(&si->cont_lock);          << 
3965         return ret;                           << 
3966 }                                                3289 }
3967                                                  3290 
3968 /*                                               3291 /*
3969  * free_swap_count_continuations - swapoff fr    3292  * free_swap_count_continuations - swapoff free all the continuation pages
3970  * appended to the swap_map, after swap_map i    3293  * appended to the swap_map, after swap_map is quiesced, before vfree'ing it.
3971  */                                              3294  */
3972 static void free_swap_count_continuations(str    3295 static void free_swap_count_continuations(struct swap_info_struct *si)
3973 {                                                3296 {
3974         pgoff_t offset;                          3297         pgoff_t offset;
3975                                                  3298 
3976         for (offset = 0; offset < si->max; of    3299         for (offset = 0; offset < si->max; offset += PAGE_SIZE) {
3977                 struct page *head;               3300                 struct page *head;
3978                 head = vmalloc_to_page(si->sw    3301                 head = vmalloc_to_page(si->swap_map + offset);
3979                 if (page_private(head)) {        3302                 if (page_private(head)) {
3980                         struct page *page, *n    3303                         struct page *page, *next;
3981                                                  3304 
3982                         list_for_each_entry_s    3305                         list_for_each_entry_safe(page, next, &head->lru, lru) {
3983                                 list_del(&pag    3306                                 list_del(&page->lru);
3984                                 __free_page(p    3307                                 __free_page(page);
3985                         }                        3308                         }
3986                 }                                3309                 }
3987         }                                        3310         }
3988 }                                                3311 }
3989                                               << 
3990 #if defined(CONFIG_MEMCG) && defined(CONFIG_B << 
3991 void __folio_throttle_swaprate(struct folio * << 
3992 {                                             << 
3993         struct swap_info_struct *si, *next;   << 
3994         int nid = folio_nid(folio);           << 
3995                                               << 
3996         if (!(gfp & __GFP_IO))                << 
3997                 return;                       << 
3998                                               << 
3999         if (!__has_usable_swap())             << 
4000                 return;                       << 
4001                                               << 
4002         if (!blk_cgroup_congested())          << 
4003                 return;                       << 
4004                                               << 
4005         /*                                    << 
4006          * We've already scheduled a throttle << 
4007          * lock.                              << 
4008          */                                   << 
4009         if (current->throttle_disk)           << 
4010                 return;                       << 
4011                                               << 
4012         spin_lock(&swap_avail_lock);          << 
4013         plist_for_each_entry_safe(si, next, & << 
4014                                   avail_lists << 
4015                 if (si->bdev) {               << 
4016                         blkcg_schedule_thrott << 
4017                         break;                << 
4018                 }                             << 
4019         }                                     << 
4020         spin_unlock(&swap_avail_lock);        << 
4021 }                                             << 
4022 #endif                                        << 
4023                                               << 
4024 static int __init swapfile_init(void)         << 
4025 {                                             << 
4026         int nid;                              << 
4027                                               << 
4028         swap_avail_heads = kmalloc_array(nr_n << 
4029                                          GFP_ << 
4030         if (!swap_avail_heads) {              << 
4031                 pr_emerg("Not enough memory f << 
4032                 return -ENOMEM;               << 
4033         }                                     << 
4034                                               << 
4035         for_each_node(nid)                    << 
4036                 plist_head_init(&swap_avail_h << 
4037                                               << 
4038         swapfile_maximum_size = arch_max_swap << 
4039                                               << 
4040 #ifdef CONFIG_MIGRATION                       << 
4041         if (swapfile_maximum_size >= (1UL <<  << 
4042                 swap_migration_ad_supported = << 
4043 #endif  /* CONFIG_MIGRATION */                << 
4044                                               << 
4045         return 0;                             << 
4046 }                                             << 
4047 subsys_initcall(swapfile_init);               << 
4048                                                  3312 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php