~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/mm/swapfile.c

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /mm/swapfile.c (Version linux-6.12-rc7) and /mm/swapfile.c (Version linux-4.4.302)


  1 // SPDX-License-Identifier: GPL-2.0-only       << 
  2 /*                                                  1 /*
  3  *  linux/mm/swapfile.c                             2  *  linux/mm/swapfile.c
  4  *                                                  3  *
  5  *  Copyright (C) 1991, 1992, 1993, 1994  Linu      4  *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
  6  *  Swap reorganised 29.12.95, Stephen Tweedie      5  *  Swap reorganised 29.12.95, Stephen Tweedie
  7  */                                                 6  */
  8                                                     7 
  9 #include <linux/blkdev.h>                      << 
 10 #include <linux/mm.h>                               8 #include <linux/mm.h>
 11 #include <linux/sched/mm.h>                    << 
 12 #include <linux/sched/task.h>                  << 
 13 #include <linux/hugetlb.h>                          9 #include <linux/hugetlb.h>
 14 #include <linux/mman.h>                            10 #include <linux/mman.h>
 15 #include <linux/slab.h>                            11 #include <linux/slab.h>
 16 #include <linux/kernel_stat.h>                     12 #include <linux/kernel_stat.h>
 17 #include <linux/swap.h>                            13 #include <linux/swap.h>
 18 #include <linux/vmalloc.h>                         14 #include <linux/vmalloc.h>
 19 #include <linux/pagemap.h>                         15 #include <linux/pagemap.h>
 20 #include <linux/namei.h>                           16 #include <linux/namei.h>
 21 #include <linux/shmem_fs.h>                        17 #include <linux/shmem_fs.h>
 22 #include <linux/blk-cgroup.h>                  !!  18 #include <linux/blkdev.h>
 23 #include <linux/random.h>                          19 #include <linux/random.h>
 24 #include <linux/writeback.h>                       20 #include <linux/writeback.h>
 25 #include <linux/proc_fs.h>                         21 #include <linux/proc_fs.h>
 26 #include <linux/seq_file.h>                        22 #include <linux/seq_file.h>
 27 #include <linux/init.h>                            23 #include <linux/init.h>
 28 #include <linux/ksm.h>                             24 #include <linux/ksm.h>
 29 #include <linux/rmap.h>                            25 #include <linux/rmap.h>
 30 #include <linux/security.h>                        26 #include <linux/security.h>
 31 #include <linux/backing-dev.h>                     27 #include <linux/backing-dev.h>
 32 #include <linux/mutex.h>                           28 #include <linux/mutex.h>
 33 #include <linux/capability.h>                      29 #include <linux/capability.h>
 34 #include <linux/syscalls.h>                        30 #include <linux/syscalls.h>
 35 #include <linux/memcontrol.h>                      31 #include <linux/memcontrol.h>
 36 #include <linux/poll.h>                            32 #include <linux/poll.h>
 37 #include <linux/oom.h>                             33 #include <linux/oom.h>
                                                   >>  34 #include <linux/frontswap.h>
 38 #include <linux/swapfile.h>                        35 #include <linux/swapfile.h>
 39 #include <linux/export.h>                          36 #include <linux/export.h>
 40 #include <linux/swap_slots.h>                  << 
 41 #include <linux/sort.h>                        << 
 42 #include <linux/completion.h>                  << 
 43 #include <linux/suspend.h>                     << 
 44 #include <linux/zswap.h>                       << 
 45 #include <linux/plist.h>                       << 
 46                                                    37 
                                                   >>  38 #include <asm/pgtable.h>
 47 #include <asm/tlbflush.h>                          39 #include <asm/tlbflush.h>
 48 #include <linux/swapops.h>                         40 #include <linux/swapops.h>
 49 #include <linux/swap_cgroup.h>                     41 #include <linux/swap_cgroup.h>
 50 #include "internal.h"                          << 
 51 #include "swap.h"                              << 
 52                                                    42 
 53 static bool swap_count_continued(struct swap_i     43 static bool swap_count_continued(struct swap_info_struct *, pgoff_t,
 54                                  unsigned char     44                                  unsigned char);
 55 static void free_swap_count_continuations(stru     45 static void free_swap_count_continuations(struct swap_info_struct *);
 56 static void swap_entry_range_free(struct swap_ !!  46 static sector_t map_swap_entry(swp_entry_t, struct block_device**);
 57                                   unsigned int << 
 58 static void swap_range_alloc(struct swap_info_ << 
 59                              unsigned int nr_e << 
 60 static bool folio_swapcache_freeable(struct fo << 
 61 static struct swap_cluster_info *lock_cluster_ << 
 62                 struct swap_info_struct *si, u << 
 63 static void unlock_cluster_or_swap_info(struct << 
 64                                         struct << 
 65                                                    47 
 66 static DEFINE_SPINLOCK(swap_lock);             !!  48 DEFINE_SPINLOCK(swap_lock);
 67 static unsigned int nr_swapfiles;                  49 static unsigned int nr_swapfiles;
 68 atomic_long_t nr_swap_pages;                       50 atomic_long_t nr_swap_pages;
 69 /*                                             << 
 70  * Some modules use swappable objects and may  << 
 71  * memory pressure (via the shrinker). Before  << 
 72  * check to see if any swap space is available << 
 73  */                                            << 
 74 EXPORT_SYMBOL_GPL(nr_swap_pages);              << 
 75 /* protected with swap_lock. reading in vm_swa     51 /* protected with swap_lock. reading in vm_swap_full() doesn't need lock */
 76 long total_swap_pages;                             52 long total_swap_pages;
 77 static int least_priority = -1;                !!  53 static int least_priority;
 78 unsigned long swapfile_maximum_size;           << 
 79 #ifdef CONFIG_MIGRATION                        << 
 80 bool swap_migration_ad_supported;              << 
 81 #endif  /* CONFIG_MIGRATION */                 << 
 82                                                    54 
 83 static const char Bad_file[] = "Bad swap file      55 static const char Bad_file[] = "Bad swap file entry ";
 84 static const char Unused_file[] = "Unused swap     56 static const char Unused_file[] = "Unused swap file entry ";
 85 static const char Bad_offset[] = "Bad swap off     57 static const char Bad_offset[] = "Bad swap offset entry ";
 86 static const char Unused_offset[] = "Unused sw     58 static const char Unused_offset[] = "Unused swap offset entry ";
 87                                                    59 
 88 /*                                                 60 /*
 89  * all active swap_info_structs                    61  * all active swap_info_structs
 90  * protected with swap_lock, and ordered by pr     62  * protected with swap_lock, and ordered by priority.
 91  */                                                63  */
 92 static PLIST_HEAD(swap_active_head);           !!  64 PLIST_HEAD(swap_active_head);
 93                                                    65 
 94 /*                                                 66 /*
 95  * all available (active, not full) swap_info_     67  * all available (active, not full) swap_info_structs
 96  * protected with swap_avail_lock, ordered by      68  * protected with swap_avail_lock, ordered by priority.
 97  * This is used by folio_alloc_swap() instead  !!  69  * This is used by get_swap_page() instead of swap_active_head
 98  * because swap_active_head includes all swap_     70  * because swap_active_head includes all swap_info_structs,
 99  * but folio_alloc_swap() doesn't need to look !!  71  * but get_swap_page() doesn't need to look at full ones.
100  * This uses its own lock instead of swap_lock     72  * This uses its own lock instead of swap_lock because when a
101  * swap_info_struct changes between not-full/f     73  * swap_info_struct changes between not-full/full, it needs to
102  * add/remove itself to/from this list, but th     74  * add/remove itself to/from this list, but the swap_info_struct->lock
103  * is held and the locking order requires swap     75  * is held and the locking order requires swap_lock to be taken
104  * before any swap_info_struct->lock.              76  * before any swap_info_struct->lock.
105  */                                                77  */
106 static struct plist_head *swap_avail_heads;    !!  78 static PLIST_HEAD(swap_avail_head);
107 static DEFINE_SPINLOCK(swap_avail_lock);           79 static DEFINE_SPINLOCK(swap_avail_lock);
108                                                    80 
109 static struct swap_info_struct *swap_info[MAX_ !!  81 struct swap_info_struct *swap_info[MAX_SWAPFILES];
110                                                    82 
111 static DEFINE_MUTEX(swapon_mutex);                 83 static DEFINE_MUTEX(swapon_mutex);
112                                                    84 
113 static DECLARE_WAIT_QUEUE_HEAD(proc_poll_wait)     85 static DECLARE_WAIT_QUEUE_HEAD(proc_poll_wait);
114 /* Activity counter to indicate that a swapon      86 /* Activity counter to indicate that a swapon or swapoff has occurred */
115 static atomic_t proc_poll_event = ATOMIC_INIT(     87 static atomic_t proc_poll_event = ATOMIC_INIT(0);
116                                                    88 
117 atomic_t nr_rotate_swap = ATOMIC_INIT(0);      << 
118                                                << 
119 static struct swap_info_struct *swap_type_to_s << 
120 {                                              << 
121         if (type >= MAX_SWAPFILES)             << 
122                 return NULL;                   << 
123                                                << 
124         return READ_ONCE(swap_info[type]); /*  << 
125 }                                              << 
126                                                << 
127 static inline unsigned char swap_count(unsigne     89 static inline unsigned char swap_count(unsigned char ent)
128 {                                                  90 {
129         return ent & ~SWAP_HAS_CACHE;   /* may !!  91         return ent & ~SWAP_HAS_CACHE;   /* may include SWAP_HAS_CONT flag */
130 }                                                  92 }
131                                                    93 
132 /* Reclaim the swap entry anyway if possible * !!  94 /* returns 1 if swap entry is freed */
133 #define TTRS_ANYWAY             0x1            !!  95 static int
134 /*                                             !!  96 __try_to_reclaim_swap(struct swap_info_struct *si, unsigned long offset)
135  * Reclaim the swap entry if there are no more << 
136  * corresponding page                          << 
137  */                                            << 
138 #define TTRS_UNMAPPED           0x2            << 
139 /* Reclaim the swap entry if swap is getting f << 
140 #define TTRS_FULL               0x4            << 
141 /* Reclaim directly, bypass the slot cache and << 
142 #define TTRS_DIRECT             0x8            << 
143                                                << 
144 static bool swap_is_has_cache(struct swap_info << 
145                               unsigned long of << 
146 {                                              << 
147         unsigned char *map = si->swap_map + of << 
148         unsigned char *map_end = map + nr_page << 
149                                                << 
150         do {                                   << 
151                 VM_BUG_ON(!(*map & SWAP_HAS_CA << 
152                 if (*map != SWAP_HAS_CACHE)    << 
153                         return false;          << 
154         } while (++map < map_end);             << 
155                                                << 
156         return true;                           << 
157 }                                              << 
158                                                << 
159 static bool swap_is_last_map(struct swap_info_ << 
160                 unsigned long offset, int nr_p << 
161 {                                              << 
162         unsigned char *map = si->swap_map + of << 
163         unsigned char *map_end = map + nr_page << 
164         unsigned char count = *map;            << 
165                                                << 
166         if (swap_count(count) != 1)            << 
167                 return false;                  << 
168                                                << 
169         while (++map < map_end) {              << 
170                 if (*map != count)             << 
171                         return false;          << 
172         }                                      << 
173                                                << 
174         *has_cache = !!(count & SWAP_HAS_CACHE << 
175         return true;                           << 
176 }                                              << 
177                                                << 
178 /*                                             << 
179  * returns number of pages in the folio that b << 
180  * the folio was reclaimed. If negative, the f << 
181  * folio was associated with the swap entry.   << 
182  */                                            << 
183 static int __try_to_reclaim_swap(struct swap_i << 
184                                  unsigned long << 
185 {                                                  97 {
186         swp_entry_t entry = swp_entry(si->type     98         swp_entry_t entry = swp_entry(si->type, offset);
187         struct address_space *address_space =  !!  99         struct page *page;
188         struct swap_cluster_info *ci;          !! 100         int ret = 0;
189         struct folio *folio;                   << 
190         int ret, nr_pages;                     << 
191         bool need_reclaim;                     << 
192                                                   101 
193         folio = filemap_get_folio(address_spac !! 102         page = find_get_page(swap_address_space(entry), entry.val);
194         if (IS_ERR(folio))                     !! 103         if (!page)
195                 return 0;                         104                 return 0;
196                                                << 
197         nr_pages = folio_nr_pages(folio);      << 
198         ret = -nr_pages;                       << 
199                                                << 
200         /*                                        105         /*
201          * When this function is called from s !! 106          * This function is called from scan_swap_map() and it's called
202          * called by vmscan.c at reclaiming fo !! 107          * by vmscan.c at reclaiming pages. So, we hold a lock on a page, here.
203          * here. We have to use trylock for av !! 108          * We have to use trylock for avoiding deadlock. This is a special
204          * case and you should use folio_free_ !! 109          * case and you should use try_to_free_swap() with explicit lock_page()
205          * in usual operations.                   110          * in usual operations.
206          */                                       111          */
207         if (!folio_trylock(folio))             !! 112         if (trylock_page(page)) {
208                 goto out;                      !! 113                 ret = try_to_free_swap(page);
209                                                !! 114                 unlock_page(page);
210         /* offset could point to the middle of !! 115         }
211         entry = folio->swap;                   !! 116         page_cache_release(page);
212         offset = swp_offset(entry);            << 
213                                                << 
214         need_reclaim = ((flags & TTRS_ANYWAY)  << 
215                         ((flags & TTRS_UNMAPPE << 
216                         ((flags & TTRS_FULL) & << 
217         if (!need_reclaim || !folio_swapcache_ << 
218                 goto out_unlock;               << 
219                                                << 
220         /*                                     << 
221          * It's safe to delete the folio from  << 
222          * swap_map is HAS_CACHE only, which m << 
223          * reference or pending writeback, and << 
224          */                                    << 
225         ci = lock_cluster_or_swap_info(si, off << 
226         need_reclaim = swap_is_has_cache(si, o << 
227         unlock_cluster_or_swap_info(si, ci);   << 
228         if (!need_reclaim)                     << 
229                 goto out_unlock;               << 
230                                                << 
231         if (!(flags & TTRS_DIRECT)) {          << 
232                 /* Free through slot cache */  << 
233                 delete_from_swap_cache(folio); << 
234                 folio_set_dirty(folio);        << 
235                 ret = nr_pages;                << 
236                 goto out_unlock;               << 
237         }                                      << 
238                                                << 
239         xa_lock_irq(&address_space->i_pages);  << 
240         __delete_from_swap_cache(folio, entry, << 
241         xa_unlock_irq(&address_space->i_pages) << 
242         folio_ref_sub(folio, nr_pages);        << 
243         folio_set_dirty(folio);                << 
244                                                << 
245         spin_lock(&si->lock);                  << 
246         /* Only sinple page folio can be backe << 
247         if (nr_pages == 1)                     << 
248                 zswap_invalidate(entry);       << 
249         swap_entry_range_free(si, entry, nr_pa << 
250         spin_unlock(&si->lock);                << 
251         ret = nr_pages;                        << 
252 out_unlock:                                    << 
253         folio_unlock(folio);                   << 
254 out:                                           << 
255         folio_put(folio);                      << 
256         return ret;                               117         return ret;
257 }                                                 118 }
258                                                   119 
259 static inline struct swap_extent *first_se(str << 
260 {                                              << 
261         struct rb_node *rb = rb_first(&sis->sw << 
262         return rb_entry(rb, struct swap_extent << 
263 }                                              << 
264                                                << 
265 static inline struct swap_extent *next_se(stru << 
266 {                                              << 
267         struct rb_node *rb = rb_next(&se->rb_n << 
268         return rb ? rb_entry(rb, struct swap_e << 
269 }                                              << 
270                                                << 
271 /*                                                120 /*
272  * swapon tell device that all the old swap co    121  * swapon tell device that all the old swap contents can be discarded,
273  * to allow the swap device to optimize its we    122  * to allow the swap device to optimize its wear-levelling.
274  */                                               123  */
275 static int discard_swap(struct swap_info_struc    124 static int discard_swap(struct swap_info_struct *si)
276 {                                                 125 {
277         struct swap_extent *se;                   126         struct swap_extent *se;
278         sector_t start_block;                     127         sector_t start_block;
279         sector_t nr_blocks;                       128         sector_t nr_blocks;
280         int err = 0;                              129         int err = 0;
281                                                   130 
282         /* Do not discard the swap header page    131         /* Do not discard the swap header page! */
283         se = first_se(si);                     !! 132         se = &si->first_swap_extent;
284         start_block = (se->start_block + 1) <<    133         start_block = (se->start_block + 1) << (PAGE_SHIFT - 9);
285         nr_blocks = ((sector_t)se->nr_pages -     134         nr_blocks = ((sector_t)se->nr_pages - 1) << (PAGE_SHIFT - 9);
286         if (nr_blocks) {                          135         if (nr_blocks) {
287                 err = blkdev_issue_discard(si-    136                 err = blkdev_issue_discard(si->bdev, start_block,
288                                 nr_blocks, GFP !! 137                                 nr_blocks, GFP_KERNEL, 0);
289                 if (err)                          138                 if (err)
290                         return err;               139                         return err;
291                 cond_resched();                   140                 cond_resched();
292         }                                         141         }
293                                                   142 
294         for (se = next_se(se); se; se = next_s !! 143         list_for_each_entry(se, &si->first_swap_extent.list, list) {
295                 start_block = se->start_block     144                 start_block = se->start_block << (PAGE_SHIFT - 9);
296                 nr_blocks = (sector_t)se->nr_p    145                 nr_blocks = (sector_t)se->nr_pages << (PAGE_SHIFT - 9);
297                                                   146 
298                 err = blkdev_issue_discard(si-    147                 err = blkdev_issue_discard(si->bdev, start_block,
299                                 nr_blocks, GFP !! 148                                 nr_blocks, GFP_KERNEL, 0);
300                 if (err)                          149                 if (err)
301                         break;                    150                         break;
302                                                   151 
303                 cond_resched();                   152                 cond_resched();
304         }                                         153         }
305         return err;             /* That will o    154         return err;             /* That will often be -EOPNOTSUPP */
306 }                                                 155 }
307                                                   156 
308 static struct swap_extent *                    << 
309 offset_to_swap_extent(struct swap_info_struct  << 
310 {                                              << 
311         struct swap_extent *se;                << 
312         struct rb_node *rb;                    << 
313                                                << 
314         rb = sis->swap_extent_root.rb_node;    << 
315         while (rb) {                           << 
316                 se = rb_entry(rb, struct swap_ << 
317                 if (offset < se->start_page)   << 
318                         rb = rb->rb_left;      << 
319                 else if (offset >= se->start_p << 
320                         rb = rb->rb_right;     << 
321                 else                           << 
322                         return se;             << 
323         }                                      << 
324         /* It *must* be present */             << 
325         BUG();                                 << 
326 }                                              << 
327                                                << 
328 sector_t swap_folio_sector(struct folio *folio << 
329 {                                              << 
330         struct swap_info_struct *sis = swp_swa << 
331         struct swap_extent *se;                << 
332         sector_t sector;                       << 
333         pgoff_t offset;                        << 
334                                                << 
335         offset = swp_offset(folio->swap);      << 
336         se = offset_to_swap_extent(sis, offset << 
337         sector = se->start_block + (offset - s << 
338         return sector << (PAGE_SHIFT - 9);     << 
339 }                                              << 
340                                                << 
341 /*                                                157 /*
342  * swap allocation tell device that a cluster     158  * swap allocation tell device that a cluster of swap can now be discarded,
343  * to allow the swap device to optimize its we    159  * to allow the swap device to optimize its wear-levelling.
344  */                                               160  */
345 static void discard_swap_cluster(struct swap_i    161 static void discard_swap_cluster(struct swap_info_struct *si,
346                                  pgoff_t start    162                                  pgoff_t start_page, pgoff_t nr_pages)
347 {                                                 163 {
348         struct swap_extent *se = offset_to_swa !! 164         struct swap_extent *se = si->curr_swap_extent;
                                                   >> 165         int found_extent = 0;
349                                                   166 
350         while (nr_pages) {                        167         while (nr_pages) {
351                 pgoff_t offset = start_page -  !! 168                 struct list_head *lh;
352                 sector_t start_block = se->sta << 
353                 sector_t nr_blocks = se->nr_pa << 
354                                                << 
355                 if (nr_blocks > nr_pages)      << 
356                         nr_blocks = nr_pages;  << 
357                 start_page += nr_blocks;       << 
358                 nr_pages -= nr_blocks;         << 
359                                                << 
360                 start_block <<= PAGE_SHIFT - 9 << 
361                 nr_blocks <<= PAGE_SHIFT - 9;  << 
362                 if (blkdev_issue_discard(si->b << 
363                                         nr_blo << 
364                         break;                 << 
365                                                   169 
366                 se = next_se(se);              !! 170                 if (se->start_page <= start_page &&
                                                   >> 171                     start_page < se->start_page + se->nr_pages) {
                                                   >> 172                         pgoff_t offset = start_page - se->start_page;
                                                   >> 173                         sector_t start_block = se->start_block + offset;
                                                   >> 174                         sector_t nr_blocks = se->nr_pages - offset;
                                                   >> 175 
                                                   >> 176                         if (nr_blocks > nr_pages)
                                                   >> 177                                 nr_blocks = nr_pages;
                                                   >> 178                         start_page += nr_blocks;
                                                   >> 179                         nr_pages -= nr_blocks;
                                                   >> 180 
                                                   >> 181                         if (!found_extent++)
                                                   >> 182                                 si->curr_swap_extent = se;
                                                   >> 183 
                                                   >> 184                         start_block <<= PAGE_SHIFT - 9;
                                                   >> 185                         nr_blocks <<= PAGE_SHIFT - 9;
                                                   >> 186                         if (blkdev_issue_discard(si->bdev, start_block,
                                                   >> 187                                     nr_blocks, GFP_NOIO, 0))
                                                   >> 188                                 break;
                                                   >> 189                 }
                                                   >> 190 
                                                   >> 191                 lh = se->list.next;
                                                   >> 192                 se = list_entry(lh, struct swap_extent, list);
367         }                                         193         }
368 }                                                 194 }
369                                                   195 
370 #ifdef CONFIG_THP_SWAP                         << 
371 #define SWAPFILE_CLUSTER        HPAGE_PMD_NR   << 
372                                                << 
373 #define swap_entry_order(order) (order)        << 
374 #else                                          << 
375 #define SWAPFILE_CLUSTER        256               196 #define SWAPFILE_CLUSTER        256
376                                                << 
377 /*                                             << 
378  * Define swap_entry_order() as constant to le << 
379  * out some code if !CONFIG_THP_SWAP           << 
380  */                                            << 
381 #define swap_entry_order(order) 0              << 
382 #endif                                         << 
383 #define LATENCY_LIMIT           256               197 #define LATENCY_LIMIT           256
384                                                   198 
385 static inline bool cluster_is_free(struct swap !! 199 static inline void cluster_set_flag(struct swap_cluster_info *info,
                                                   >> 200         unsigned int flag)
386 {                                                 201 {
387         return info->flags & CLUSTER_FLAG_FREE !! 202         info->flags = flag;
388 }                                                 203 }
389                                                   204 
390 static inline unsigned int cluster_index(struc !! 205 static inline unsigned int cluster_count(struct swap_cluster_info *info)
391                                          struc << 
392 {                                                 206 {
393         return ci - si->cluster_info;          !! 207         return info->data;
394 }                                                 208 }
395                                                   209 
396 static inline unsigned int cluster_offset(stru !! 210 static inline void cluster_set_count(struct swap_cluster_info *info,
397                                           stru !! 211                                      unsigned int c)
398 {                                                 212 {
399         return cluster_index(si, ci) * SWAPFIL !! 213         info->data = c;
400 }                                                 214 }
401                                                   215 
402 static inline struct swap_cluster_info *lock_c !! 216 static inline void cluster_set_count_flag(struct swap_cluster_info *info,
403                                                !! 217                                          unsigned int c, unsigned int f)
404 {                                                 218 {
405         struct swap_cluster_info *ci;          !! 219         info->flags = f;
                                                   >> 220         info->data = c;
                                                   >> 221 }
406                                                   222 
407         ci = si->cluster_info;                 !! 223 static inline unsigned int cluster_next(struct swap_cluster_info *info)
408         if (ci) {                              !! 224 {
409                 ci += offset / SWAPFILE_CLUSTE !! 225         return info->data;
410                 spin_lock(&ci->lock);          << 
411         }                                      << 
412         return ci;                             << 
413 }                                                 226 }
414                                                   227 
415 static inline void unlock_cluster(struct swap_ !! 228 static inline void cluster_set_next(struct swap_cluster_info *info,
                                                   >> 229                                     unsigned int n)
416 {                                                 230 {
417         if (ci)                                !! 231         info->data = n;
418                 spin_unlock(&ci->lock);        << 
419 }                                                 232 }
420                                                   233 
421 /*                                             !! 234 static inline void cluster_set_next_flag(struct swap_cluster_info *info,
422  * Determine the locking method in use for thi !! 235                                          unsigned int n, unsigned int f)
423  * swap_cluster_info if SSD-style cluster-base << 
424  */                                            << 
425 static inline struct swap_cluster_info *lock_c << 
426                 struct swap_info_struct *si, u << 
427 {                                                 236 {
428         struct swap_cluster_info *ci;          !! 237         info->flags = f;
                                                   >> 238         info->data = n;
                                                   >> 239 }
429                                                   240 
430         /* Try to use fine-grained SSD-style l !! 241 static inline bool cluster_is_free(struct swap_cluster_info *info)
431         ci = lock_cluster(si, offset);         !! 242 {
432         /* Otherwise, fall back to traditional !! 243         return info->flags & CLUSTER_FLAG_FREE;
433         if (!ci)                               !! 244 }
434                 spin_lock(&si->lock);          << 
435                                                   245 
436         return ci;                             !! 246 static inline bool cluster_is_null(struct swap_cluster_info *info)
                                                   >> 247 {
                                                   >> 248         return info->flags & CLUSTER_FLAG_NEXT_NULL;
437 }                                                 249 }
438                                                   250 
439 static inline void unlock_cluster_or_swap_info !! 251 static inline void cluster_set_null(struct swap_cluster_info *info)
440                                                << 
441 {                                                 252 {
442         if (ci)                                !! 253         info->flags = CLUSTER_FLAG_NEXT_NULL;
443                 unlock_cluster(ci);            !! 254         info->data = 0;
444         else                                   << 
445                 spin_unlock(&si->lock);        << 
446 }                                                 255 }
447                                                   256 
448 /* Add a cluster to discard list and schedule     257 /* Add a cluster to discard list and schedule it to do discard */
449 static void swap_cluster_schedule_discard(stru    258 static void swap_cluster_schedule_discard(struct swap_info_struct *si,
450                 struct swap_cluster_info *ci)  !! 259                 unsigned int idx)
451 {                                                 260 {
452         unsigned int idx = cluster_index(si, c << 
453         /*                                        261         /*
454          * If scan_swap_map_slots() can't find !! 262          * If scan_swap_map() can't find a free cluster, it will check
455          * si->swap_map directly. To make sure    263          * si->swap_map directly. To make sure the discarding cluster isn't
456          * taken by scan_swap_map_slots(), mar !! 264          * taken by scan_swap_map(), mark the swap entries bad (occupied). It
457          * It will be cleared after discard    !! 265          * will be cleared after discard
458          */                                       266          */
459         memset(si->swap_map + idx * SWAPFILE_C    267         memset(si->swap_map + idx * SWAPFILE_CLUSTER,
460                         SWAP_MAP_BAD, SWAPFILE    268                         SWAP_MAP_BAD, SWAPFILE_CLUSTER);
461                                                   269 
462         VM_BUG_ON(ci->flags & CLUSTER_FLAG_FRE !! 270         if (cluster_is_null(&si->discard_cluster_head)) {
463         list_move_tail(&ci->list, &si->discard !! 271                 cluster_set_next_flag(&si->discard_cluster_head,
464         ci->flags = 0;                         !! 272                                                 idx, 0);
465         schedule_work(&si->discard_work);      !! 273                 cluster_set_next_flag(&si->discard_cluster_tail,
466 }                                              !! 274                                                 idx, 0);
467                                                !! 275         } else {
468 static void __free_cluster(struct swap_info_st !! 276                 unsigned int tail = cluster_next(&si->discard_cluster_tail);
469 {                                              !! 277                 cluster_set_next(&si->cluster_info[tail], idx);
470         lockdep_assert_held(&si->lock);        !! 278                 cluster_set_next_flag(&si->discard_cluster_tail,
471         lockdep_assert_held(&ci->lock);        !! 279                                                 idx, 0);
                                                   >> 280         }
472                                                   281 
473         if (ci->flags)                         !! 282         schedule_work(&si->discard_work);
474                 list_move_tail(&ci->list, &si- << 
475         else                                   << 
476                 list_add_tail(&ci->list, &si-> << 
477         ci->flags = CLUSTER_FLAG_FREE;         << 
478         ci->order = 0;                         << 
479 }                                                 283 }
480                                                   284 
481 /*                                                285 /*
482  * Doing discard actually. After a cluster dis    286  * Doing discard actually. After a cluster discard is finished, the cluster
483  * will be added to free cluster list. caller     287  * will be added to free cluster list. caller should hold si->lock.
484 */                                                288 */
485 static void swap_do_scheduled_discard(struct s    289 static void swap_do_scheduled_discard(struct swap_info_struct *si)
486 {                                                 290 {
487         struct swap_cluster_info *ci;          !! 291         struct swap_cluster_info *info;
488         unsigned int idx;                         292         unsigned int idx;
489                                                   293 
490         while (!list_empty(&si->discard_cluste !! 294         info = si->cluster_info;
491                 ci = list_first_entry(&si->dis !! 295 
492                 list_del(&ci->list);           !! 296         while (!cluster_is_null(&si->discard_cluster_head)) {
493                 idx = cluster_index(si, ci);   !! 297                 idx = cluster_next(&si->discard_cluster_head);
                                                   >> 298 
                                                   >> 299                 cluster_set_next_flag(&si->discard_cluster_head,
                                                   >> 300                                                 cluster_next(&info[idx]), 0);
                                                   >> 301                 if (cluster_next(&si->discard_cluster_tail) == idx) {
                                                   >> 302                         cluster_set_null(&si->discard_cluster_head);
                                                   >> 303                         cluster_set_null(&si->discard_cluster_tail);
                                                   >> 304                 }
494                 spin_unlock(&si->lock);           305                 spin_unlock(&si->lock);
495                                                   306 
496                 discard_swap_cluster(si, idx *    307                 discard_swap_cluster(si, idx * SWAPFILE_CLUSTER,
497                                 SWAPFILE_CLUST    308                                 SWAPFILE_CLUSTER);
498                                                   309 
499                 spin_lock(&si->lock);             310                 spin_lock(&si->lock);
500                 spin_lock(&ci->lock);          !! 311                 cluster_set_flag(&info[idx], CLUSTER_FLAG_FREE);
501                 __free_cluster(si, ci);        !! 312                 if (cluster_is_null(&si->free_cluster_head)) {
                                                   >> 313                         cluster_set_next_flag(&si->free_cluster_head,
                                                   >> 314                                                 idx, 0);
                                                   >> 315                         cluster_set_next_flag(&si->free_cluster_tail,
                                                   >> 316                                                 idx, 0);
                                                   >> 317                 } else {
                                                   >> 318                         unsigned int tail;
                                                   >> 319 
                                                   >> 320                         tail = cluster_next(&si->free_cluster_tail);
                                                   >> 321                         cluster_set_next(&info[tail], idx);
                                                   >> 322                         cluster_set_next_flag(&si->free_cluster_tail,
                                                   >> 323                                                 idx, 0);
                                                   >> 324                 }
502                 memset(si->swap_map + idx * SW    325                 memset(si->swap_map + idx * SWAPFILE_CLUSTER,
503                                 0, SWAPFILE_CL    326                                 0, SWAPFILE_CLUSTER);
504                 spin_unlock(&ci->lock);        << 
505         }                                         327         }
506 }                                                 328 }
507                                                   329 
508 static void swap_discard_work(struct work_stru    330 static void swap_discard_work(struct work_struct *work)
509 {                                                 331 {
510         struct swap_info_struct *si;              332         struct swap_info_struct *si;
511                                                   333 
512         si = container_of(work, struct swap_in    334         si = container_of(work, struct swap_info_struct, discard_work);
513                                                   335 
514         spin_lock(&si->lock);                     336         spin_lock(&si->lock);
515         swap_do_scheduled_discard(si);            337         swap_do_scheduled_discard(si);
516         spin_unlock(&si->lock);                   338         spin_unlock(&si->lock);
517 }                                                 339 }
518                                                   340 
519 static void swap_users_ref_free(struct percpu_ << 
520 {                                              << 
521         struct swap_info_struct *si;           << 
522                                                << 
523         si = container_of(ref, struct swap_inf << 
524         complete(&si->comp);                   << 
525 }                                              << 
526                                                << 
527 static void free_cluster(struct swap_info_stru << 
528 {                                              << 
529         VM_BUG_ON(ci->count != 0);             << 
530         lockdep_assert_held(&si->lock);        << 
531         lockdep_assert_held(&ci->lock);        << 
532                                                << 
533         if (ci->flags & CLUSTER_FLAG_FRAG)     << 
534                 si->frag_cluster_nr[ci->order] << 
535                                                << 
536         /*                                     << 
537          * If the swap is discardable, prepare << 
538          * instead of free it immediately. The << 
539          * after discard.                      << 
540          */                                    << 
541         if ((si->flags & (SWP_WRITEOK | SWP_PA << 
542             (SWP_WRITEOK | SWP_PAGE_DISCARD))  << 
543                 swap_cluster_schedule_discard( << 
544                 return;                        << 
545         }                                      << 
546                                                << 
547         __free_cluster(si, ci);                << 
548 }                                              << 
549                                                << 
550 /*                                                341 /*
551  * The cluster corresponding to page_nr will b !! 342  * The cluster corresponding to page_nr will be used. The cluster will be
552  * added to free cluster list and its usage co !! 343  * removed from free cluster list and its usage counter will be increased.
553  * Only used for initialization.               << 
554  */                                               344  */
555 static void inc_cluster_info_page(struct swap_ !! 345 static void inc_cluster_info_page(struct swap_info_struct *p,
556         struct swap_cluster_info *cluster_info    346         struct swap_cluster_info *cluster_info, unsigned long page_nr)
557 {                                                 347 {
558         unsigned long idx = page_nr / SWAPFILE    348         unsigned long idx = page_nr / SWAPFILE_CLUSTER;
559         struct swap_cluster_info *ci;          << 
560                                                   349 
561         if (!cluster_info)                        350         if (!cluster_info)
562                 return;                           351                 return;
                                                   >> 352         if (cluster_is_free(&cluster_info[idx])) {
                                                   >> 353                 VM_BUG_ON(cluster_next(&p->free_cluster_head) != idx);
                                                   >> 354                 cluster_set_next_flag(&p->free_cluster_head,
                                                   >> 355                         cluster_next(&cluster_info[idx]), 0);
                                                   >> 356                 if (cluster_next(&p->free_cluster_tail) == idx) {
                                                   >> 357                         cluster_set_null(&p->free_cluster_tail);
                                                   >> 358                         cluster_set_null(&p->free_cluster_head);
                                                   >> 359                 }
                                                   >> 360                 cluster_set_count_flag(&cluster_info[idx], 0, 0);
                                                   >> 361         }
563                                                   362 
564         ci = cluster_info + idx;               !! 363         VM_BUG_ON(cluster_count(&cluster_info[idx]) >= SWAPFILE_CLUSTER);
565         ci->count++;                           !! 364         cluster_set_count(&cluster_info[idx],
566                                                !! 365                 cluster_count(&cluster_info[idx]) + 1);
567         VM_BUG_ON(ci->count > SWAPFILE_CLUSTER << 
568         VM_BUG_ON(ci->flags);                  << 
569 }                                                 366 }
570                                                   367 
571 /*                                                368 /*
572  * The cluster ci decreases @nr_pages usage. I !! 369  * The cluster corresponding to page_nr decreases one usage. If the usage
573  * which means no page in the cluster is in us !! 370  * counter becomes 0, which means no page in the cluster is in using, we can
574  * the cluster and add it to free cluster list !! 371  * optionally discard the cluster and add it to free cluster list.
575  */                                               372  */
576 static void dec_cluster_info_page(struct swap_ !! 373 static void dec_cluster_info_page(struct swap_info_struct *p,
577                                   struct swap_ !! 374         struct swap_cluster_info *cluster_info, unsigned long page_nr)
578 {                                                 375 {
579         if (!si->cluster_info)                 !! 376         unsigned long idx = page_nr / SWAPFILE_CLUSTER;
580                 return;                        << 
581                                                << 
582         VM_BUG_ON(ci->count < nr_pages);       << 
583         VM_BUG_ON(cluster_is_free(ci));        << 
584         lockdep_assert_held(&si->lock);        << 
585         lockdep_assert_held(&ci->lock);        << 
586         ci->count -= nr_pages;                 << 
587                                                   377 
588         if (!ci->count) {                      !! 378         if (!cluster_info)
589                 free_cluster(si, ci);          << 
590                 return;                           379                 return;
591         }                                      << 
592                                                   380 
593         if (!(ci->flags & CLUSTER_FLAG_NONFULL !! 381         VM_BUG_ON(cluster_count(&cluster_info[idx]) == 0);
594                 VM_BUG_ON(ci->flags & CLUSTER_ !! 382         cluster_set_count(&cluster_info[idx],
595                 if (ci->flags & CLUSTER_FLAG_F !! 383                 cluster_count(&cluster_info[idx]) - 1);
596                         si->frag_cluster_nr[ci !! 384 
597                 list_move_tail(&ci->list, &si- !! 385         if (cluster_count(&cluster_info[idx]) == 0) {
598                 ci->flags = CLUSTER_FLAG_NONFU !! 386                 /*
599         }                                      !! 387                  * If the swap is discardable, prepare discard the cluster
600 }                                              !! 388                  * instead of free it immediately. The cluster will be freed
601                                                !! 389                  * after discard.
602 static bool cluster_reclaim_range(struct swap_ !! 390                  */
603                                   struct swap_ !! 391                 if ((p->flags & (SWP_WRITEOK | SWP_PAGE_DISCARD)) ==
604                                   unsigned lon !! 392                                  (SWP_WRITEOK | SWP_PAGE_DISCARD)) {
605 {                                              !! 393                         swap_cluster_schedule_discard(p, idx);
606         unsigned char *map = si->swap_map;     !! 394                         return;
607         unsigned long offset;                  << 
608                                                << 
609         spin_unlock(&ci->lock);                << 
610         spin_unlock(&si->lock);                << 
611                                                << 
612         for (offset = start; offset < end; off << 
613                 switch (READ_ONCE(map[offset]) << 
614                 case 0:                        << 
615                         continue;              << 
616                 case SWAP_HAS_CACHE:           << 
617                         if (__try_to_reclaim_s << 
618                                 continue;      << 
619                         goto out;              << 
620                 default:                       << 
621                         goto out;              << 
622                 }                              << 
623         }                                      << 
624 out:                                           << 
625         spin_lock(&si->lock);                  << 
626         spin_lock(&ci->lock);                  << 
627                                                << 
628         /*                                     << 
629          * Recheck the range no matter reclaim << 
630          * could have been be freed while we a << 
631          */                                    << 
632         for (offset = start; offset < end; off << 
633                 if (READ_ONCE(map[offset]))    << 
634                         return false;          << 
635                                                << 
636         return true;                           << 
637 }                                              << 
638                                                << 
639 static bool cluster_scan_range(struct swap_inf << 
640                                struct swap_clu << 
641                                unsigned long s << 
642 {                                              << 
643         unsigned long offset, end = start + nr << 
644         unsigned char *map = si->swap_map;     << 
645         bool need_reclaim = false;             << 
646                                                << 
647         for (offset = start; offset < end; off << 
648                 switch (READ_ONCE(map[offset]) << 
649                 case 0:                        << 
650                         continue;              << 
651                 case SWAP_HAS_CACHE:           << 
652                         if (!vm_swap_full())   << 
653                                 return false;  << 
654                         need_reclaim = true;   << 
655                         continue;              << 
656                 default:                       << 
657                         return false;          << 
658                 }                              << 
659         }                                      << 
660                                                << 
661         if (need_reclaim)                      << 
662                 return cluster_reclaim_range(s << 
663                                                << 
664         return true;                           << 
665 }                                              << 
666                                                << 
667 static void cluster_alloc_range(struct swap_in << 
668                                 unsigned int s << 
669                                 unsigned int o << 
670 {                                              << 
671         unsigned int nr_pages = 1 << order;    << 
672                                                << 
673         if (cluster_is_free(ci)) {             << 
674                 if (nr_pages < SWAPFILE_CLUSTE << 
675                         list_move_tail(&ci->li << 
676                         ci->flags = CLUSTER_FL << 
677                 }                                 395                 }
678                 ci->order = order;             << 
679         }                                      << 
680                                                << 
681         memset(si->swap_map + start, usage, nr << 
682         swap_range_alloc(si, start, nr_pages); << 
683         ci->count += nr_pages;                 << 
684                                                << 
685         if (ci->count == SWAPFILE_CLUSTER) {   << 
686                 VM_BUG_ON(!(ci->flags &        << 
687                           (CLUSTER_FLAG_FREE | << 
688                 if (ci->flags & CLUSTER_FLAG_F << 
689                         si->frag_cluster_nr[ci << 
690                 list_move_tail(&ci->list, &si- << 
691                 ci->flags = CLUSTER_FLAG_FULL; << 
692         }                                      << 
693 }                                              << 
694                                                << 
695 static unsigned int alloc_swap_scan_cluster(st << 
696                                             un << 
697                                             un << 
698 {                                              << 
699         unsigned long start = offset & ~(SWAPF << 
700         unsigned long end = min(start + SWAPFI << 
701         unsigned int nr_pages = 1 << order;    << 
702         struct swap_cluster_info *ci;          << 
703                                                   396 
704         if (end < nr_pages)                    !! 397                 cluster_set_flag(&cluster_info[idx], CLUSTER_FLAG_FREE);
705                 return SWAP_NEXT_INVALID;      !! 398                 if (cluster_is_null(&p->free_cluster_head)) {
706         end -= nr_pages;                       !! 399                         cluster_set_next_flag(&p->free_cluster_head, idx, 0);
707                                                !! 400                         cluster_set_next_flag(&p->free_cluster_tail, idx, 0);
708         ci = lock_cluster(si, offset);         !! 401                 } else {
709         if (ci->count + nr_pages > SWAPFILE_CL !! 402                         unsigned int tail = cluster_next(&p->free_cluster_tail);
710                 offset = SWAP_NEXT_INVALID;    !! 403                         cluster_set_next(&cluster_info[tail], idx);
711                 goto done;                     !! 404                         cluster_set_next_flag(&p->free_cluster_tail, idx, 0);
712         }                                      << 
713                                                << 
714         while (offset <= end) {                << 
715                 if (cluster_scan_range(si, ci, << 
716                         cluster_alloc_range(si << 
717                         *foundp = offset;      << 
718                         if (ci->count == SWAPF << 
719                                 offset = SWAP_ << 
720                                 goto done;     << 
721                         }                      << 
722                         offset += nr_pages;    << 
723                         break;                 << 
724                 }                                 405                 }
725                 offset += nr_pages;            << 
726         }                                         406         }
727         if (offset > end)                      << 
728                 offset = SWAP_NEXT_INVALID;    << 
729 done:                                          << 
730         unlock_cluster(ci);                    << 
731         return offset;                         << 
732 }                                                 407 }
733                                                   408 
734 /* Return true if reclaimed a whole cluster */ !! 409 /*
735 static void swap_reclaim_full_clusters(struct  !! 410  * It's possible scan_swap_map() uses a free cluster in the middle of free
                                                   >> 411  * cluster list. Avoiding such abuse to avoid list corruption.
                                                   >> 412  */
                                                   >> 413 static bool
                                                   >> 414 scan_swap_map_ssd_cluster_conflict(struct swap_info_struct *si,
                                                   >> 415         unsigned long offset)
736 {                                                 416 {
737         long to_scan = 1;                      !! 417         struct percpu_cluster *percpu_cluster;
738         unsigned long offset, end;             !! 418         bool conflict;
739         struct swap_cluster_info *ci;          << 
740         unsigned char *map = si->swap_map;     << 
741         int nr_reclaim;                        << 
742                                                << 
743         if (force)                             << 
744                 to_scan = si->inuse_pages / SW << 
745                                                << 
746         while (!list_empty(&si->full_clusters) << 
747                 ci = list_first_entry(&si->ful << 
748                 list_move_tail(&ci->list, &si- << 
749                 offset = cluster_offset(si, ci << 
750                 end = min(si->max, offset + SW << 
751                 to_scan--;                     << 
752                                                << 
753                 spin_unlock(&si->lock);        << 
754                 while (offset < end) {         << 
755                         if (READ_ONCE(map[offs << 
756                                 nr_reclaim = _ << 
757                                                << 
758                                 if (nr_reclaim << 
759                                         offset << 
760                                         contin << 
761                                 }              << 
762                         }                      << 
763                         offset++;              << 
764                 }                              << 
765                 spin_lock(&si->lock);          << 
766                                                   419 
767                 if (to_scan <= 0)              !! 420         offset /= SWAPFILE_CLUSTER;
768                         break;                 !! 421         conflict = !cluster_is_null(&si->free_cluster_head) &&
769         }                                      !! 422                 offset != cluster_next(&si->free_cluster_head) &&
770 }                                              !! 423                 cluster_is_free(&si->cluster_info[offset]);
771                                                << 
772 static void swap_reclaim_work(struct work_stru << 
773 {                                              << 
774         struct swap_info_struct *si;           << 
775                                                   424 
776         si = container_of(work, struct swap_in !! 425         if (!conflict)
                                                   >> 426                 return false;
777                                                   427 
778         spin_lock(&si->lock);                  !! 428         percpu_cluster = this_cpu_ptr(si->percpu_cluster);
779         swap_reclaim_full_clusters(si, true);  !! 429         cluster_set_null(&percpu_cluster->index);
780         spin_unlock(&si->lock);                !! 430         return true;
781 }                                                 431 }
782                                                   432 
783 /*                                                433 /*
784  * Try to get swap entries with specified orde !! 434  * Try to get a swap entry from current cpu's swap entry pool (a cluster). This
785  * pool (a cluster). This might involve alloca !! 435  * might involve allocating a new cluster for current CPU too.
786  * too.                                        << 
787  */                                               436  */
788 static unsigned long cluster_alloc_swap_entry( !! 437 static void scan_swap_map_try_ssd_cluster(struct swap_info_struct *si,
789                                                !! 438         unsigned long *offset, unsigned long *scan_base)
790 {                                                 439 {
791         struct percpu_cluster *cluster;           440         struct percpu_cluster *cluster;
792         struct swap_cluster_info *ci;          !! 441         bool found_free;
793         unsigned int offset, found = 0;        !! 442         unsigned long tmp;
794                                                   443 
795 new_cluster:                                      444 new_cluster:
796         lockdep_assert_held(&si->lock);        << 
797         cluster = this_cpu_ptr(si->percpu_clus    445         cluster = this_cpu_ptr(si->percpu_cluster);
798         offset = cluster->next[order];         !! 446         if (cluster_is_null(&cluster->index)) {
799         if (offset) {                          !! 447                 if (!cluster_is_null(&si->free_cluster_head)) {
800                 offset = alloc_swap_scan_clust !! 448                         cluster->index = si->free_cluster_head;
801                 if (found)                     !! 449                         cluster->next = cluster_next(&cluster->index) *
802                         goto done;             !! 450                                         SWAPFILE_CLUSTER;
803         }                                      !! 451                 } else if (!cluster_is_null(&si->discard_cluster_head)) {
804                                                << 
805         if (!list_empty(&si->free_clusters)) { << 
806                 ci = list_first_entry(&si->fre << 
807                 offset = alloc_swap_scan_clust << 
808                 VM_BUG_ON(!found);             << 
809                 goto done;                     << 
810         }                                      << 
811                                                << 
812         /* Try reclaim from full clusters if f << 
813         if (vm_swap_full())                    << 
814                 swap_reclaim_full_clusters(si, << 
815                                                << 
816         if (order < PMD_ORDER) {               << 
817                 unsigned int frags = 0;        << 
818                                                << 
819                 while (!list_empty(&si->nonful << 
820                         ci = list_first_entry( << 
821                                                << 
822                         list_move_tail(&ci->li << 
823                         ci->flags = CLUSTER_FL << 
824                         si->frag_cluster_nr[or << 
825                         offset = alloc_swap_sc << 
826                                                << 
827                         frags++;               << 
828                         if (found)             << 
829                                 break;         << 
830                 }                              << 
831                                                << 
832                 if (!found) {                  << 
833                         /*                        452                         /*
834                          * Nonfull clusters ar !! 453                          * we don't have free cluster but have some clusters in
835                          * here, count them to !! 454                          * discarding, do discard now and reclaim them
836                          */                       455                          */
837                         while (frags < si->fra !! 456                         swap_do_scheduled_discard(si);
838                                 ci = list_firs !! 457                         *scan_base = *offset = si->cluster_next;
839                                                !! 458                         goto new_cluster;
840                                 /*             !! 459                 } else
841                                  * Rotate the  !! 460                         return;
842                                  * high order  << 
843                                  * this help k << 
844                                  */            << 
845                                 list_move_tail << 
846                                 offset = alloc << 
847                                                << 
848                                 frags++;       << 
849                                 if (found)     << 
850                                         break; << 
851                         }                      << 
852                 }                              << 
853         }                                      << 
854                                                << 
855         if (found)                             << 
856                 goto done;                     << 
857                                                << 
858         if (!list_empty(&si->discard_clusters) << 
859                 /*                             << 
860                  * we don't have free cluster  << 
861                  * discarding, do discard now  << 
862                  * reread cluster_next_cpu sin << 
863                  */                            << 
864                 swap_do_scheduled_discard(si); << 
865                 goto new_cluster;              << 
866         }                                      << 
867                                                << 
868         if (order)                             << 
869                 goto done;                     << 
870                                                << 
871         /* Order 0 stealing from higher order  << 
872         for (int o = 1; o < SWAP_NR_ORDERS; o+ << 
873                 /*                             << 
874                  * Clusters here have at least << 
875                  * allocation, but reclaim may << 
876                  */                            << 
877                 while (!list_empty(&si->frag_c << 
878                         ci = list_first_entry( << 
879                                                << 
880                         offset = alloc_swap_sc << 
881                                                << 
882                         if (found)             << 
883                                 goto done;     << 
884                 }                              << 
885                                                << 
886                 while (!list_empty(&si->nonful << 
887                         ci = list_first_entry( << 
888                                                << 
889                         offset = alloc_swap_sc << 
890                                                << 
891                         if (found)             << 
892                                 goto done;     << 
893                 }                              << 
894         }                                      << 
895                                                << 
896 done:                                          << 
897         cluster->next[order] = offset;         << 
898         return found;                          << 
899 }                                              << 
900                                                << 
901 static void __del_from_avail_list(struct swap_ << 
902 {                                              << 
903         int nid;                               << 
904                                                << 
905         assert_spin_locked(&si->lock);         << 
906         for_each_node(nid)                     << 
907                 plist_del(&si->avail_lists[nid << 
908 }                                              << 
909                                                << 
910 static void del_from_avail_list(struct swap_in << 
911 {                                              << 
912         spin_lock(&swap_avail_lock);           << 
913         __del_from_avail_list(si);             << 
914         spin_unlock(&swap_avail_lock);         << 
915 }                                              << 
916                                                << 
917 static void swap_range_alloc(struct swap_info_ << 
918                              unsigned int nr_e << 
919 {                                              << 
920         unsigned int end = offset + nr_entries << 
921                                                << 
922         if (offset == si->lowest_bit)          << 
923                 si->lowest_bit += nr_entries;  << 
924         if (end == si->highest_bit)            << 
925                 WRITE_ONCE(si->highest_bit, si << 
926         WRITE_ONCE(si->inuse_pages, si->inuse_ << 
927         if (si->inuse_pages == si->pages) {    << 
928                 si->lowest_bit = si->max;      << 
929                 si->highest_bit = 0;           << 
930                 del_from_avail_list(si);       << 
931                                                << 
932                 if (vm_swap_full())            << 
933                         schedule_work(&si->rec << 
934         }                                      << 
935 }                                              << 
936                                                << 
937 static void add_to_avail_list(struct swap_info << 
938 {                                              << 
939         int nid;                               << 
940                                                << 
941         spin_lock(&swap_avail_lock);           << 
942         for_each_node(nid)                     << 
943                 plist_add(&si->avail_lists[nid << 
944         spin_unlock(&swap_avail_lock);         << 
945 }                                              << 
946                                                << 
947 static void swap_range_free(struct swap_info_s << 
948                             unsigned int nr_en << 
949 {                                              << 
950         unsigned long begin = offset;          << 
951         unsigned long end = offset + nr_entrie << 
952         void (*swap_slot_free_notify)(struct b << 
953         unsigned int i;                        << 
954                                                << 
955         /*                                     << 
956          * Use atomic clear_bit operations onl << 
957          * bitmap_clear to prevent adjacent bi << 
958          */                                    << 
959         for (i = 0; i < nr_entries; i++)       << 
960                 clear_bit(offset + i, si->zero << 
961                                                << 
962         if (offset < si->lowest_bit)           << 
963                 si->lowest_bit = offset;       << 
964         if (end > si->highest_bit) {           << 
965                 bool was_full = !si->highest_b << 
966                                                << 
967                 WRITE_ONCE(si->highest_bit, en << 
968                 if (was_full && (si->flags & S << 
969                         add_to_avail_list(si); << 
970         }                                      << 
971         if (si->flags & SWP_BLKDEV)            << 
972                 swap_slot_free_notify =        << 
973                         si->bdev->bd_disk->fop << 
974         else                                   << 
975                 swap_slot_free_notify = NULL;  << 
976         while (offset <= end) {                << 
977                 arch_swap_invalidate_page(si-> << 
978                 if (swap_slot_free_notify)     << 
979                         swap_slot_free_notify( << 
980                 offset++;                      << 
981         }                                         461         }
982         clear_shadow_from_swap_cache(si->type, << 
983                                                << 
984         /*                                     << 
985          * Make sure that try_to_unuse() obser << 
986          * only after the above cleanups are d << 
987          */                                    << 
988         smp_wmb();                             << 
989         atomic_long_add(nr_entries, &nr_swap_p << 
990         WRITE_ONCE(si->inuse_pages, si->inuse_ << 
991 }                                              << 
992                                                << 
993 static void set_cluster_next(struct swap_info_ << 
994 {                                              << 
995         unsigned long prev;                    << 
996                                                   462 
997         if (!(si->flags & SWP_SOLIDSTATE)) {   !! 463         found_free = false;
998                 si->cluster_next = next;       << 
999                 return;                        << 
1000         }                                     << 
1001                                                  464 
1002         prev = this_cpu_read(*si->cluster_nex << 
1003         /*                                       465         /*
1004          * Cross the swap address space size  !! 466          * Other CPUs can use our cluster if they can't find a free cluster,
1005          * another trunk randomly to avoid lo !! 467          * check if there is still free entry in the cluster
1006          * address space if possible.         << 
1007          */                                      468          */
1008         if ((prev >> SWAP_ADDRESS_SPACE_SHIFT !! 469         tmp = cluster->next;
1009             (next >> SWAP_ADDRESS_SPACE_SHIFT !! 470         while (tmp < si->max && tmp < (cluster_next(&cluster->index) + 1) *
1010                 /* No free swap slots availab !! 471                SWAPFILE_CLUSTER) {
1011                 if (si->highest_bit <= si->lo !! 472                 if (!si->swap_map[tmp]) {
1012                         return;               !! 473                         found_free = true;
1013                 next = get_random_u32_inclusi << 
1014                 next = ALIGN_DOWN(next, SWAP_ << 
1015                 next = max_t(unsigned int, ne << 
1016         }                                     << 
1017         this_cpu_write(*si->cluster_next_cpu, << 
1018 }                                             << 
1019                                               << 
1020 static bool swap_offset_available_and_locked( << 
1021                                               << 
1022 {                                             << 
1023         if (data_race(!si->swap_map[offset])) << 
1024                 spin_lock(&si->lock);         << 
1025                 return true;                  << 
1026         }                                     << 
1027                                               << 
1028         if (vm_swap_full() && READ_ONCE(si->s << 
1029                 spin_lock(&si->lock);         << 
1030                 return true;                  << 
1031         }                                     << 
1032                                               << 
1033         return false;                         << 
1034 }                                             << 
1035                                               << 
1036 static int cluster_alloc_swap(struct swap_inf << 
1037                              unsigned char us << 
1038                              swp_entry_t slot << 
1039 {                                             << 
1040         int n_ret = 0;                        << 
1041                                               << 
1042         VM_BUG_ON(!si->cluster_info);         << 
1043                                               << 
1044         while (n_ret < nr) {                  << 
1045                 unsigned long offset = cluste << 
1046                                               << 
1047                 if (!offset)                  << 
1048                         break;                   474                         break;
1049                 slots[n_ret++] = swp_entry(si !! 475                 }
                                                   >> 476                 tmp++;
1050         }                                        477         }
1051                                               !! 478         if (!found_free) {
1052         return n_ret;                         !! 479                 cluster_set_null(&cluster->index);
                                                   >> 480                 goto new_cluster;
                                                   >> 481         }
                                                   >> 482         cluster->next = tmp + 1;
                                                   >> 483         *offset = tmp;
                                                   >> 484         *scan_base = tmp;
1053 }                                                485 }
1054                                                  486 
1055 static int scan_swap_map_slots(struct swap_in !! 487 static unsigned long scan_swap_map(struct swap_info_struct *si,
1056                                unsigned char  !! 488                                    unsigned char usage)
1057                                swp_entry_t sl << 
1058 {                                                489 {
1059         unsigned long offset;                    490         unsigned long offset;
1060         unsigned long scan_base;                 491         unsigned long scan_base;
1061         unsigned long last_in_cluster = 0;       492         unsigned long last_in_cluster = 0;
1062         int latency_ration = LATENCY_LIMIT;      493         int latency_ration = LATENCY_LIMIT;
1063         unsigned int nr_pages = 1 << order;   << 
1064         int n_ret = 0;                        << 
1065         bool scanned_many = false;            << 
1066                                                  494 
1067         /*                                       495         /*
1068          * We try to cluster swap pages by al    496          * We try to cluster swap pages by allocating them sequentially
1069          * in swap.  Once we've allocated SWA    497          * in swap.  Once we've allocated SWAPFILE_CLUSTER pages this
1070          * way, however, we resort to first-f    498          * way, however, we resort to first-free allocation, starting
1071          * a new cluster.  This prevents us f    499          * a new cluster.  This prevents us from scattering swap pages
1072          * all over the entire swap partition    500          * all over the entire swap partition, so that we reduce
1073          * overall disk seek times between sw    501          * overall disk seek times between swap pages.  -- sct
1074          * But we do now try to find an empty    502          * But we do now try to find an empty cluster.  -Andrea
1075          * And we let swap pages go all over     503          * And we let swap pages go all over an SSD partition.  Hugh
1076          */                                      504          */
1077                                                  505 
1078         if (order > 0) {                      << 
1079                 /*                            << 
1080                  * Should not even be attempt << 
1081                  * page swap is disabled.  Wa << 
1082                  */                           << 
1083                 if (!IS_ENABLED(CONFIG_THP_SW << 
1084                     nr_pages > SWAPFILE_CLUST << 
1085                         VM_WARN_ON_ONCE(1);   << 
1086                         return 0;             << 
1087                 }                             << 
1088                                               << 
1089                 /*                            << 
1090                  * Swapfile is not block devi << 
1091                  * to allocate large entries. << 
1092                  */                           << 
1093                 if (!(si->flags & SWP_BLKDEV) << 
1094                         return 0;             << 
1095         }                                     << 
1096                                               << 
1097         if (si->cluster_info)                 << 
1098                 return cluster_alloc_swap(si, << 
1099                                               << 
1100         si->flags += SWP_SCANNING;               506         si->flags += SWP_SCANNING;
                                                   >> 507         scan_base = offset = si->cluster_next;
1101                                                  508 
1102         /* For HDD, sequential access is more !! 509         /* SSD algorithm */
1103         scan_base = si->cluster_next;         !! 510         if (si->cluster_info) {
1104         offset = scan_base;                   !! 511                 scan_swap_map_try_ssd_cluster(si, &offset, &scan_base);
                                                   >> 512                 goto checks;
                                                   >> 513         }
1105                                                  514 
1106         if (unlikely(!si->cluster_nr--)) {       515         if (unlikely(!si->cluster_nr--)) {
1107                 if (si->pages - si->inuse_pag    516                 if (si->pages - si->inuse_pages < SWAPFILE_CLUSTER) {
1108                         si->cluster_nr = SWAP    517                         si->cluster_nr = SWAPFILE_CLUSTER - 1;
1109                         goto checks;             518                         goto checks;
1110                 }                                519                 }
1111                                                  520 
1112                 spin_unlock(&si->lock);          521                 spin_unlock(&si->lock);
1113                                                  522 
1114                 /*                               523                 /*
1115                  * If seek is expensive, star    524                  * If seek is expensive, start searching for new cluster from
1116                  * start of partition, to min    525                  * start of partition, to minimize the span of allocated swap.
                                                   >> 526                  * If seek is cheap, that is the SWP_SOLIDSTATE si->cluster_info
                                                   >> 527                  * case, just handled by scan_swap_map_try_ssd_cluster() above.
1117                  */                              528                  */
1118                 scan_base = offset = si->lowe    529                 scan_base = offset = si->lowest_bit;
1119                 last_in_cluster = offset + SW    530                 last_in_cluster = offset + SWAPFILE_CLUSTER - 1;
1120                                                  531 
1121                 /* Locate the first empty (un    532                 /* Locate the first empty (unaligned) cluster */
1122                 for (; last_in_cluster <= REA !! 533                 for (; last_in_cluster <= si->highest_bit; offset++) {
1123                         if (si->swap_map[offs    534                         if (si->swap_map[offset])
1124                                 last_in_clust    535                                 last_in_cluster = offset + SWAPFILE_CLUSTER;
1125                         else if (offset == la    536                         else if (offset == last_in_cluster) {
1126                                 spin_lock(&si    537                                 spin_lock(&si->lock);
1127                                 offset -= SWA    538                                 offset -= SWAPFILE_CLUSTER - 1;
1128                                 si->cluster_n    539                                 si->cluster_next = offset;
1129                                 si->cluster_n    540                                 si->cluster_nr = SWAPFILE_CLUSTER - 1;
1130                                 goto checks;     541                                 goto checks;
1131                         }                        542                         }
1132                         if (unlikely(--latenc    543                         if (unlikely(--latency_ration < 0)) {
1133                                 cond_resched(    544                                 cond_resched();
1134                                 latency_ratio    545                                 latency_ration = LATENCY_LIMIT;
1135                         }                        546                         }
1136                 }                                547                 }
1137                                                  548 
1138                 offset = scan_base;              549                 offset = scan_base;
1139                 spin_lock(&si->lock);            550                 spin_lock(&si->lock);
1140                 si->cluster_nr = SWAPFILE_CLU    551                 si->cluster_nr = SWAPFILE_CLUSTER - 1;
1141         }                                        552         }
1142                                                  553 
1143 checks:                                          554 checks:
                                                   >> 555         if (si->cluster_info) {
                                                   >> 556                 while (scan_swap_map_ssd_cluster_conflict(si, offset))
                                                   >> 557                         scan_swap_map_try_ssd_cluster(si, &offset, &scan_base);
                                                   >> 558         }
1144         if (!(si->flags & SWP_WRITEOK))          559         if (!(si->flags & SWP_WRITEOK))
1145                 goto no_page;                    560                 goto no_page;
1146         if (!si->highest_bit)                    561         if (!si->highest_bit)
1147                 goto no_page;                    562                 goto no_page;
1148         if (offset > si->highest_bit)            563         if (offset > si->highest_bit)
1149                 scan_base = offset = si->lowe    564                 scan_base = offset = si->lowest_bit;
1150                                                  565 
1151         /* reuse swap entry of cache-only swa    566         /* reuse swap entry of cache-only swap if not busy. */
1152         if (vm_swap_full() && si->swap_map[of    567         if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
1153                 int swap_was_freed;              568                 int swap_was_freed;
1154                 spin_unlock(&si->lock);          569                 spin_unlock(&si->lock);
1155                 swap_was_freed = __try_to_rec !! 570                 swap_was_freed = __try_to_reclaim_swap(si, offset);
1156                 spin_lock(&si->lock);            571                 spin_lock(&si->lock);
1157                 /* entry was freed successful    572                 /* entry was freed successfully, try to use this again */
1158                 if (swap_was_freed > 0)       !! 573                 if (swap_was_freed)
1159                         goto checks;             574                         goto checks;
1160                 goto scan; /* check next one     575                 goto scan; /* check next one */
1161         }                                        576         }
1162                                                  577 
1163         if (si->swap_map[offset]) {           !! 578         if (si->swap_map[offset])
1164                 if (!n_ret)                   !! 579                 goto scan;
1165                         goto scan;            << 
1166                 else                          << 
1167                         goto done;            << 
1168         }                                     << 
1169         memset(si->swap_map + offset, usage,  << 
1170                                               << 
1171         swap_range_alloc(si, offset, nr_pages << 
1172         slots[n_ret++] = swp_entry(si->type,  << 
1173                                               << 
1174         /* got enough slots or reach max slot << 
1175         if ((n_ret == nr) || (offset >= si->h << 
1176                 goto done;                    << 
1177                                               << 
1178         /* search for next available slot */  << 
1179                                               << 
1180         /* time to take a break? */           << 
1181         if (unlikely(--latency_ration < 0)) { << 
1182                 if (n_ret)                    << 
1183                         goto done;            << 
1184                 spin_unlock(&si->lock);       << 
1185                 cond_resched();               << 
1186                 spin_lock(&si->lock);         << 
1187                 latency_ration = LATENCY_LIMI << 
1188         }                                     << 
1189                                               << 
1190         if (si->cluster_nr && !si->swap_map[+ << 
1191                 /* non-ssd case, still more s << 
1192                 --si->cluster_nr;             << 
1193                 goto checks;                  << 
1194         }                                     << 
1195                                               << 
1196         /*                                    << 
1197          * Even if there's no free clusters a << 
1198          * try to scan a little more quickly  << 
1199          * have scanned too many slots alread << 
1200          */                                   << 
1201         if (!scanned_many) {                  << 
1202                 unsigned long scan_limit;     << 
1203                                                  580 
1204                 if (offset < scan_base)       !! 581         if (offset == si->lowest_bit)
1205                         scan_limit = scan_bas !! 582                 si->lowest_bit++;
1206                 else                          !! 583         if (offset == si->highest_bit)
1207                         scan_limit = si->high !! 584                 si->highest_bit--;
1208                 for (; offset <= scan_limit & !! 585         si->inuse_pages++;
1209                      offset++) {              !! 586         if (si->inuse_pages == si->pages) {
1210                         if (!si->swap_map[off !! 587                 si->lowest_bit = si->max;
1211                                 goto checks;  !! 588                 si->highest_bit = 0;
1212                 }                             !! 589                 spin_lock(&swap_avail_lock);
                                                   >> 590                 plist_del(&si->avail_list, &swap_avail_head);
                                                   >> 591                 spin_unlock(&swap_avail_lock);
1213         }                                        592         }
1214                                               !! 593         si->swap_map[offset] = usage;
1215 done:                                         !! 594         inc_cluster_info_page(si, si->cluster_info, offset);
1216         if (order == 0)                       !! 595         si->cluster_next = offset + 1;
1217                 set_cluster_next(si, offset + << 
1218         si->flags -= SWP_SCANNING;               596         si->flags -= SWP_SCANNING;
1219         return n_ret;                         !! 597 
                                                   >> 598         return offset;
1220                                                  599 
1221 scan:                                            600 scan:
1222         VM_WARN_ON(order > 0);                << 
1223         spin_unlock(&si->lock);                  601         spin_unlock(&si->lock);
1224         while (++offset <= READ_ONCE(si->high !! 602         while (++offset <= si->highest_bit) {
                                                   >> 603                 if (!si->swap_map[offset]) {
                                                   >> 604                         spin_lock(&si->lock);
                                                   >> 605                         goto checks;
                                                   >> 606                 }
                                                   >> 607                 if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
                                                   >> 608                         spin_lock(&si->lock);
                                                   >> 609                         goto checks;
                                                   >> 610                 }
1225                 if (unlikely(--latency_ration    611                 if (unlikely(--latency_ration < 0)) {
1226                         cond_resched();          612                         cond_resched();
1227                         latency_ration = LATE    613                         latency_ration = LATENCY_LIMIT;
1228                         scanned_many = true;  << 
1229                 }                                614                 }
1230                 if (swap_offset_available_and << 
1231                         goto checks;          << 
1232         }                                        615         }
1233         offset = si->lowest_bit;                 616         offset = si->lowest_bit;
1234         while (offset < scan_base) {             617         while (offset < scan_base) {
                                                   >> 618                 if (!si->swap_map[offset]) {
                                                   >> 619                         spin_lock(&si->lock);
                                                   >> 620                         goto checks;
                                                   >> 621                 }
                                                   >> 622                 if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
                                                   >> 623                         spin_lock(&si->lock);
                                                   >> 624                         goto checks;
                                                   >> 625                 }
1235                 if (unlikely(--latency_ration    626                 if (unlikely(--latency_ration < 0)) {
1236                         cond_resched();          627                         cond_resched();
1237                         latency_ration = LATE    628                         latency_ration = LATENCY_LIMIT;
1238                         scanned_many = true;  << 
1239                 }                                629                 }
1240                 if (swap_offset_available_and << 
1241                         goto checks;          << 
1242                 offset++;                        630                 offset++;
1243         }                                        631         }
1244         spin_lock(&si->lock);                    632         spin_lock(&si->lock);
1245                                                  633 
1246 no_page:                                         634 no_page:
1247         si->flags -= SWP_SCANNING;               635         si->flags -= SWP_SCANNING;
1248         return n_ret;                         !! 636         return 0;
1249 }                                                637 }
1250                                                  638 
1251 int get_swap_pages(int n_goal, swp_entry_t sw !! 639 swp_entry_t get_swap_page(void)
1252 {                                                640 {
1253         int order = swap_entry_order(entry_or << 
1254         unsigned long size = 1 << order;      << 
1255         struct swap_info_struct *si, *next;      641         struct swap_info_struct *si, *next;
1256         long avail_pgs;                       !! 642         pgoff_t offset;
1257         int n_ret = 0;                        << 
1258         int node;                             << 
1259                                               << 
1260         spin_lock(&swap_avail_lock);          << 
1261                                                  643 
1262         avail_pgs = atomic_long_read(&nr_swap !! 644         if (atomic_long_read(&nr_swap_pages) <= 0)
1263         if (avail_pgs <= 0) {                 << 
1264                 spin_unlock(&swap_avail_lock) << 
1265                 goto noswap;                     645                 goto noswap;
1266         }                                     !! 646         atomic_long_dec(&nr_swap_pages);
1267                                                  647 
1268         n_goal = min3((long)n_goal, (long)SWA !! 648         spin_lock(&swap_avail_lock);
1269                                               << 
1270         atomic_long_sub(n_goal * size, &nr_sw << 
1271                                                  649 
1272 start_over:                                      650 start_over:
1273         node = numa_node_id();                !! 651         plist_for_each_entry_safe(si, next, &swap_avail_head, avail_list) {
1274         plist_for_each_entry_safe(si, next, & << 
1275                 /* requeue si to after same-p    652                 /* requeue si to after same-priority siblings */
1276                 plist_requeue(&si->avail_list !! 653                 plist_requeue(&si->avail_list, &swap_avail_head);
1277                 spin_unlock(&swap_avail_lock)    654                 spin_unlock(&swap_avail_lock);
1278                 spin_lock(&si->lock);            655                 spin_lock(&si->lock);
1279                 if (!si->highest_bit || !(si-    656                 if (!si->highest_bit || !(si->flags & SWP_WRITEOK)) {
1280                         spin_lock(&swap_avail    657                         spin_lock(&swap_avail_lock);
1281                         if (plist_node_empty( !! 658                         if (plist_node_empty(&si->avail_list)) {
1282                                 spin_unlock(&    659                                 spin_unlock(&si->lock);
1283                                 goto nextsi;     660                                 goto nextsi;
1284                         }                        661                         }
1285                         WARN(!si->highest_bit    662                         WARN(!si->highest_bit,
1286                              "swap_info %d in    663                              "swap_info %d in list but !highest_bit\n",
1287                              si->type);          664                              si->type);
1288                         WARN(!(si->flags & SW    665                         WARN(!(si->flags & SWP_WRITEOK),
1289                              "swap_info %d in    666                              "swap_info %d in list but !SWP_WRITEOK\n",
1290                              si->type);          667                              si->type);
1291                         __del_from_avail_list !! 668                         plist_del(&si->avail_list, &swap_avail_head);
1292                         spin_unlock(&si->lock    669                         spin_unlock(&si->lock);
1293                         goto nextsi;             670                         goto nextsi;
1294                 }                                671                 }
1295                 n_ret = scan_swap_map_slots(s << 
1296                                             n << 
1297                 spin_unlock(&si->lock);       << 
1298                 if (n_ret || size > 1)        << 
1299                         goto check_out;       << 
1300                 cond_resched();               << 
1301                                                  672 
                                                   >> 673                 /* This is called for allocating swap entry for cache */
                                                   >> 674                 offset = scan_swap_map(si, SWAP_HAS_CACHE);
                                                   >> 675                 spin_unlock(&si->lock);
                                                   >> 676                 if (offset)
                                                   >> 677                         return swp_entry(si->type, offset);
                                                   >> 678                 pr_debug("scan_swap_map of si %d failed to find offset\n",
                                                   >> 679                        si->type);
1302                 spin_lock(&swap_avail_lock);     680                 spin_lock(&swap_avail_lock);
1303 nextsi:                                          681 nextsi:
1304                 /*                               682                 /*
1305                  * if we got here, it's likel    683                  * if we got here, it's likely that si was almost full before,
1306                  * and since scan_swap_map_sl !! 684                  * and since scan_swap_map() can drop the si->lock, multiple
1307                  * multiple callers probably  !! 685                  * callers probably all tried to get a page from the same si
1308                  * same si and it filled up b !! 686                  * and it filled up before we could get one; or, the si filled
1309                  * filled up between us dropp !! 687                  * up between us dropping swap_avail_lock and taking si->lock.
1310                  * si->lock. Since we dropped !! 688                  * Since we dropped the swap_avail_lock, the swap_avail_head
1311                  * swap_avail_head list may h !! 689                  * list may have been modified; so if next is still in the
1312                  * still in the swap_avail_he !! 690                  * swap_avail_head list then try it, otherwise start over.
1313                  * start over if we have not  << 
1314                  */                              691                  */
1315                 if (plist_node_empty(&next->a !! 692                 if (plist_node_empty(&next->avail_list))
1316                         goto start_over;         693                         goto start_over;
1317         }                                        694         }
1318                                                  695 
1319         spin_unlock(&swap_avail_lock);           696         spin_unlock(&swap_avail_lock);
1320                                                  697 
1321 check_out:                                    !! 698         atomic_long_inc(&nr_swap_pages);
1322         if (n_ret < n_goal)                   << 
1323                 atomic_long_add((long)(n_goal << 
1324                                 &nr_swap_page << 
1325 noswap:                                          699 noswap:
1326         return n_ret;                         !! 700         return (swp_entry_t) {0};
1327 }                                                701 }
1328                                                  702 
1329 static struct swap_info_struct *_swap_info_ge !! 703 /* The only caller of this function is now suspend routine */
                                                   >> 704 swp_entry_t get_swap_page_of_type(int type)
1330 {                                                705 {
1331         struct swap_info_struct *si;             706         struct swap_info_struct *si;
1332         unsigned long offset;                 !! 707         pgoff_t offset;
                                                   >> 708 
                                                   >> 709         si = swap_info[type];
                                                   >> 710         spin_lock(&si->lock);
                                                   >> 711         if (si && (si->flags & SWP_WRITEOK)) {
                                                   >> 712                 atomic_long_dec(&nr_swap_pages);
                                                   >> 713                 /* This is called for allocating swap entry, not cache */
                                                   >> 714                 offset = scan_swap_map(si, 1);
                                                   >> 715                 if (offset) {
                                                   >> 716                         spin_unlock(&si->lock);
                                                   >> 717                         return swp_entry(type, offset);
                                                   >> 718                 }
                                                   >> 719                 atomic_long_inc(&nr_swap_pages);
                                                   >> 720         }
                                                   >> 721         spin_unlock(&si->lock);
                                                   >> 722         return (swp_entry_t) {0};
                                                   >> 723 }
                                                   >> 724 
                                                   >> 725 static struct swap_info_struct *swap_info_get(swp_entry_t entry)
                                                   >> 726 {
                                                   >> 727         struct swap_info_struct *p;
                                                   >> 728         unsigned long offset, type;
1333                                                  729 
1334         if (!entry.val)                          730         if (!entry.val)
1335                 goto out;                        731                 goto out;
1336         si = swp_swap_info(entry);            !! 732         type = swp_type(entry);
1337         if (!si)                              !! 733         if (type >= nr_swapfiles)
1338                 goto bad_nofile;                 734                 goto bad_nofile;
1339         if (data_race(!(si->flags & SWP_USED) !! 735         p = swap_info[type];
                                                   >> 736         if (!(p->flags & SWP_USED))
1340                 goto bad_device;                 737                 goto bad_device;
1341         offset = swp_offset(entry);              738         offset = swp_offset(entry);
1342         if (offset >= si->max)                !! 739         if (offset >= p->max)
1343                 goto bad_offset;                 740                 goto bad_offset;
1344         if (data_race(!si->swap_map[swp_offse !! 741         if (!p->swap_map[offset])
1345                 goto bad_free;                   742                 goto bad_free;
1346         return si;                            !! 743         spin_lock(&p->lock);
                                                   >> 744         return p;
1347                                                  745 
1348 bad_free:                                        746 bad_free:
1349         pr_err("%s: %s%08lx\n", __func__, Unu !! 747         pr_err("swap_free: %s%08lx\n", Unused_offset, entry.val);
1350         goto out;                                748         goto out;
1351 bad_offset:                                      749 bad_offset:
1352         pr_err("%s: %s%08lx\n", __func__, Bad !! 750         pr_err("swap_free: %s%08lx\n", Bad_offset, entry.val);
1353         goto out;                                751         goto out;
1354 bad_device:                                      752 bad_device:
1355         pr_err("%s: %s%08lx\n", __func__, Unu !! 753         pr_err("swap_free: %s%08lx\n", Unused_file, entry.val);
1356         goto out;                                754         goto out;
1357 bad_nofile:                                      755 bad_nofile:
1358         pr_err("%s: %s%08lx\n", __func__, Bad !! 756         pr_err("swap_free: %s%08lx\n", Bad_file, entry.val);
1359 out:                                             757 out:
1360         return NULL;                             758         return NULL;
1361 }                                                759 }
1362                                                  760 
1363 static struct swap_info_struct *swap_info_get !! 761 static unsigned char swap_entry_free(struct swap_info_struct *p,
1364                                         struc !! 762                                      swp_entry_t entry, unsigned char usage)
1365 {                                             << 
1366         struct swap_info_struct *p;           << 
1367                                               << 
1368         p = _swap_info_get(entry);            << 
1369                                               << 
1370         if (p != q) {                         << 
1371                 if (q != NULL)                << 
1372                         spin_unlock(&q->lock) << 
1373                 if (p != NULL)                << 
1374                         spin_lock(&p->lock);  << 
1375         }                                     << 
1376         return p;                             << 
1377 }                                             << 
1378                                               << 
1379 static unsigned char __swap_entry_free_locked << 
1380                                               << 
1381                                               << 
1382 {                                                763 {
                                                   >> 764         unsigned long offset = swp_offset(entry);
1383         unsigned char count;                     765         unsigned char count;
1384         unsigned char has_cache;                 766         unsigned char has_cache;
1385                                                  767 
1386         count = si->swap_map[offset];         !! 768         count = p->swap_map[offset];
1387                                               << 
1388         has_cache = count & SWAP_HAS_CACHE;      769         has_cache = count & SWAP_HAS_CACHE;
1389         count &= ~SWAP_HAS_CACHE;                770         count &= ~SWAP_HAS_CACHE;
1390                                                  771 
1391         if (usage == SWAP_HAS_CACHE) {           772         if (usage == SWAP_HAS_CACHE) {
1392                 VM_BUG_ON(!has_cache);           773                 VM_BUG_ON(!has_cache);
1393                 has_cache = 0;                   774                 has_cache = 0;
1394         } else if (count == SWAP_MAP_SHMEM) {    775         } else if (count == SWAP_MAP_SHMEM) {
1395                 /*                               776                 /*
1396                  * Or we could insist on shme    777                  * Or we could insist on shmem.c using a special
1397                  * swap_shmem_free() and free    778                  * swap_shmem_free() and free_shmem_swap_and_cache()...
1398                  */                              779                  */
1399                 count = 0;                       780                 count = 0;
1400         } else if ((count & ~COUNT_CONTINUED)    781         } else if ((count & ~COUNT_CONTINUED) <= SWAP_MAP_MAX) {
1401                 if (count == COUNT_CONTINUED)    782                 if (count == COUNT_CONTINUED) {
1402                         if (swap_count_contin !! 783                         if (swap_count_continued(p, offset, count))
1403                                 count = SWAP_    784                                 count = SWAP_MAP_MAX | COUNT_CONTINUED;
1404                         else                     785                         else
1405                                 count = SWAP_    786                                 count = SWAP_MAP_MAX;
1406                 } else                           787                 } else
1407                         count--;                 788                         count--;
1408         }                                        789         }
1409                                                  790 
1410         usage = count | has_cache;            !! 791         if (!count)
1411         if (usage)                            !! 792                 mem_cgroup_uncharge_swap(entry);
1412                 WRITE_ONCE(si->swap_map[offse << 
1413         else                                  << 
1414                 WRITE_ONCE(si->swap_map[offse << 
1415                                               << 
1416         return usage;                         << 
1417 }                                             << 
1418                                               << 
1419 /*                                            << 
1420  * When we get a swap entry, if there aren't  << 
1421  * prevent swapoff, such as the folio in swap << 
1422  * reader side is locked, etc., the swap entr << 
1423  * because of swapoff.  Then, we need to encl << 
1424  * functions with get_swap_device() and put_s << 
1425  * swap functions call get/put_swap_device()  << 
1426  *                                            << 
1427  * RCU reader side lock (including any spinlo << 
1428  * prevent swapoff, because synchronize_rcu() << 
1429  * before freeing data structures.            << 
1430  *                                            << 
1431  * Check whether swap entry is valid in the s << 
1432  * return pointer to swap_info_struct, and ke << 
1433  * via preventing the swap device from being  << 
1434  * put_swap_device() is called.  Otherwise re << 
1435  *                                            << 
1436  * Notice that swapoff or swapoff+swapon can  << 
1437  * percpu_ref_tryget_live() in get_swap_devic << 
1438  * percpu_ref_put() in put_swap_device() if t << 
1439  * to prevent swapoff.  The caller must be pr << 
1440  * example, the following situation is possib << 
1441  *                                            << 
1442  *   CPU1                               CPU2  << 
1443  *   do_swap_page()                           << 
1444  *     ...                              swapo << 
1445  *     __read_swap_cache_async()              << 
1446  *       swapcache_prepare()                  << 
1447  *         __swap_duplicate()                 << 
1448  *           // check swap_map                << 
1449  *     // verify PTE not changed              << 
1450  *                                            << 
1451  * In __swap_duplicate(), the swap_map need t << 
1452  * changing partly because the specified swap << 
1453  * swap device which has been swapoff.  And i << 
1454  * the page is read from the swap device, the << 
1455  * changed with the page table locked to chec << 
1456  * has been swapoff or swapoff+swapon.        << 
1457  */                                           << 
1458 struct swap_info_struct *get_swap_device(swp_ << 
1459 {                                             << 
1460         struct swap_info_struct *si;          << 
1461         unsigned long offset;                 << 
1462                                                  793 
1463         if (!entry.val)                       !! 794         usage = count | has_cache;
1464                 goto out;                     !! 795         p->swap_map[offset] = usage;
1465         si = swp_swap_info(entry);            << 
1466         if (!si)                              << 
1467                 goto bad_nofile;              << 
1468         if (!percpu_ref_tryget_live(&si->user << 
1469                 goto out;                     << 
1470         /*                                    << 
1471          * Guarantee the si->users are checke << 
1472          * fields of swap_info_struct.        << 
1473          *                                    << 
1474          * Paired with the spin_unlock() afte << 
1475          * enable_swap_info().                << 
1476          */                                   << 
1477         smp_rmb();                            << 
1478         offset = swp_offset(entry);           << 
1479         if (offset >= si->max)                << 
1480                 goto put_out;                 << 
1481                                               << 
1482         return si;                            << 
1483 bad_nofile:                                   << 
1484         pr_err("%s: %s%08lx\n", __func__, Bad << 
1485 out:                                          << 
1486         return NULL;                          << 
1487 put_out:                                      << 
1488         pr_err("%s: %s%08lx\n", __func__, Bad << 
1489         percpu_ref_put(&si->users);           << 
1490         return NULL;                          << 
1491 }                                             << 
1492                                               << 
1493 static unsigned char __swap_entry_free(struct << 
1494                                        swp_en << 
1495 {                                             << 
1496         struct swap_cluster_info *ci;         << 
1497         unsigned long offset = swp_offset(ent << 
1498         unsigned char usage;                  << 
1499                                               << 
1500         ci = lock_cluster_or_swap_info(si, of << 
1501         usage = __swap_entry_free_locked(si,  << 
1502         unlock_cluster_or_swap_info(si, ci);  << 
1503         if (!usage)                           << 
1504                 free_swap_slot(entry);        << 
1505                                               << 
1506         return usage;                         << 
1507 }                                             << 
1508                                               << 
1509 static bool __swap_entries_free(struct swap_i << 
1510                 swp_entry_t entry, int nr)    << 
1511 {                                             << 
1512         unsigned long offset = swp_offset(ent << 
1513         unsigned int type = swp_type(entry);  << 
1514         struct swap_cluster_info *ci;         << 
1515         bool has_cache = false;               << 
1516         unsigned char count;                  << 
1517         int i;                                << 
1518                                               << 
1519         if (nr <= 1 || swap_count(data_race(s << 
1520                 goto fallback;                << 
1521         /* cross into another cluster */      << 
1522         if (nr > SWAPFILE_CLUSTER - offset %  << 
1523                 goto fallback;                << 
1524                                               << 
1525         ci = lock_cluster_or_swap_info(si, of << 
1526         if (!swap_is_last_map(si, offset, nr, << 
1527                 unlock_cluster_or_swap_info(s << 
1528                 goto fallback;                << 
1529         }                                     << 
1530         for (i = 0; i < nr; i++)              << 
1531                 WRITE_ONCE(si->swap_map[offse << 
1532         unlock_cluster_or_swap_info(si, ci);  << 
1533                                               << 
1534         if (!has_cache) {                     << 
1535                 for (i = 0; i < nr; i++)      << 
1536                         zswap_invalidate(swp_ << 
1537                 spin_lock(&si->lock);         << 
1538                 swap_entry_range_free(si, ent << 
1539                 spin_unlock(&si->lock);       << 
1540         }                                     << 
1541         return has_cache;                     << 
1542                                                  796 
1543 fallback:                                     !! 797         /* free if no reference */
1544         for (i = 0; i < nr; i++) {            !! 798         if (!usage) {
1545                 if (data_race(si->swap_map[of !! 799                 dec_cluster_info_page(p, p->cluster_info, offset);
1546                         count = __swap_entry_ !! 800                 if (offset < p->lowest_bit)
1547                         if (count == SWAP_HAS !! 801                         p->lowest_bit = offset;
1548                                 has_cache = t !! 802                 if (offset > p->highest_bit) {
1549                 } else {                      !! 803                         bool was_full = !p->highest_bit;
1550                         WARN_ON_ONCE(1);      !! 804                         p->highest_bit = offset;
                                                   >> 805                         if (was_full && (p->flags & SWP_WRITEOK)) {
                                                   >> 806                                 spin_lock(&swap_avail_lock);
                                                   >> 807                                 WARN_ON(!plist_node_empty(&p->avail_list));
                                                   >> 808                                 if (plist_node_empty(&p->avail_list))
                                                   >> 809                                         plist_add(&p->avail_list,
                                                   >> 810                                                   &swap_avail_head);
                                                   >> 811                                 spin_unlock(&swap_avail_lock);
                                                   >> 812                         }
1551                 }                                813                 }
1552         }                                     !! 814                 atomic_long_inc(&nr_swap_pages);
1553         return has_cache;                     !! 815                 p->inuse_pages--;
1554 }                                             !! 816                 frontswap_invalidate_page(p->type, offset);
1555                                               !! 817                 if (p->flags & SWP_BLKDEV) {
1556 /*                                            !! 818                         struct gendisk *disk = p->bdev->bd_disk;
1557  * Drop the last HAS_CACHE flag of swap entri !! 819                         if (disk->fops->swap_slot_free_notify)
1558  * ensure all entries belong to the same cgro !! 820                                 disk->fops->swap_slot_free_notify(p->bdev,
1559  */                                           !! 821                                                                   offset);
1560 static void swap_entry_range_free(struct swap << 
1561                                   unsigned in << 
1562 {                                             << 
1563         unsigned long offset = swp_offset(ent << 
1564         unsigned char *map = si->swap_map + o << 
1565         unsigned char *map_end = map + nr_pag << 
1566         struct swap_cluster_info *ci;         << 
1567                                               << 
1568         ci = lock_cluster(si, offset);        << 
1569         do {                                  << 
1570                 VM_BUG_ON(*map != SWAP_HAS_CA << 
1571                 *map = 0;                     << 
1572         } while (++map < map_end);            << 
1573         dec_cluster_info_page(si, ci, nr_page << 
1574         unlock_cluster(ci);                   << 
1575                                               << 
1576         mem_cgroup_uncharge_swap(entry, nr_pa << 
1577         swap_range_free(si, offset, nr_pages) << 
1578 }                                             << 
1579                                               << 
1580 static void cluster_swap_free_nr(struct swap_ << 
1581                 unsigned long offset, int nr_ << 
1582                 unsigned char usage)          << 
1583 {                                             << 
1584         struct swap_cluster_info *ci;         << 
1585         DECLARE_BITMAP(to_free, BITS_PER_LONG << 
1586         int i, nr;                            << 
1587                                               << 
1588         ci = lock_cluster_or_swap_info(si, of << 
1589         while (nr_pages) {                    << 
1590                 nr = min(BITS_PER_LONG, nr_pa << 
1591                 for (i = 0; i < nr; i++) {    << 
1592                         if (!__swap_entry_fre << 
1593                                 bitmap_set(to << 
1594                 }                             << 
1595                 if (!bitmap_empty(to_free, BI << 
1596                         unlock_cluster_or_swa << 
1597                         for_each_set_bit(i, t << 
1598                                 free_swap_slo << 
1599                         if (nr == nr_pages)   << 
1600                                 return;       << 
1601                         bitmap_clear(to_free, << 
1602                         ci = lock_cluster_or_ << 
1603                 }                                822                 }
1604                 offset += nr;                 << 
1605                 nr_pages -= nr;               << 
1606         }                                        823         }
1607         unlock_cluster_or_swap_info(si, ci);  !! 824 
                                                   >> 825         return usage;
1608 }                                                826 }
1609                                                  827 
1610 /*                                               828 /*
1611  * Caller has made sure that the swap device     829  * Caller has made sure that the swap device corresponding to entry
1612  * is still around or has not been recycled.     830  * is still around or has not been recycled.
1613  */                                              831  */
1614 void swap_free_nr(swp_entry_t entry, int nr_p !! 832 void swap_free(swp_entry_t entry)
1615 {                                                833 {
1616         int nr;                               !! 834         struct swap_info_struct *p;
1617         struct swap_info_struct *sis;         << 
1618         unsigned long offset = swp_offset(ent << 
1619                                               << 
1620         sis = _swap_info_get(entry);          << 
1621         if (!sis)                             << 
1622                 return;                       << 
1623                                                  835 
1624         while (nr_pages) {                    !! 836         p = swap_info_get(entry);
1625                 nr = min_t(int, nr_pages, SWA !! 837         if (p) {
1626                 cluster_swap_free_nr(sis, off !! 838                 swap_entry_free(p, entry, 1);
1627                 offset += nr;                 !! 839                 spin_unlock(&p->lock);
1628                 nr_pages -= nr;               << 
1629         }                                        840         }
1630 }                                                841 }
1631                                                  842 
1632 /*                                               843 /*
1633  * Called after dropping swapcache to decreas    844  * Called after dropping swapcache to decrease refcnt to swap entries.
1634  */                                              845  */
1635 void put_swap_folio(struct folio *folio, swp_ !! 846 void swapcache_free(swp_entry_t entry)
1636 {                                                847 {
1637         unsigned long offset = swp_offset(ent !! 848         struct swap_info_struct *p;
1638         struct swap_cluster_info *ci;         << 
1639         struct swap_info_struct *si;          << 
1640         int size = 1 << swap_entry_order(foli << 
1641                                               << 
1642         si = _swap_info_get(entry);           << 
1643         if (!si)                              << 
1644                 return;                       << 
1645                                               << 
1646         ci = lock_cluster_or_swap_info(si, of << 
1647         if (size > 1 && swap_is_has_cache(si, << 
1648                 unlock_cluster_or_swap_info(s << 
1649                 spin_lock(&si->lock);         << 
1650                 swap_entry_range_free(si, ent << 
1651                 spin_unlock(&si->lock);       << 
1652                 return;                       << 
1653         }                                     << 
1654         for (int i = 0; i < size; i++, entry. << 
1655                 if (!__swap_entry_free_locked << 
1656                         unlock_cluster_or_swa << 
1657                         free_swap_slot(entry) << 
1658                         if (i == size - 1)    << 
1659                                 return;       << 
1660                         lock_cluster_or_swap_ << 
1661                 }                             << 
1662         }                                     << 
1663         unlock_cluster_or_swap_info(si, ci);  << 
1664 }                                             << 
1665                                               << 
1666 static int swp_entry_cmp(const void *ent1, co << 
1667 {                                             << 
1668         const swp_entry_t *e1 = ent1, *e2 = e << 
1669                                               << 
1670         return (int)swp_type(*e1) - (int)swp_ << 
1671 }                                             << 
1672                                               << 
1673 void swapcache_free_entries(swp_entry_t *entr << 
1674 {                                             << 
1675         struct swap_info_struct *p, *prev;    << 
1676         int i;                                << 
1677                                               << 
1678         if (n <= 0)                           << 
1679                 return;                       << 
1680                                               << 
1681         prev = NULL;                          << 
1682         p = NULL;                             << 
1683                                                  849 
1684         /*                                    !! 850         p = swap_info_get(entry);
1685          * Sort swap entries by swap device,  !! 851         if (p) {
1686          * nr_swapfiles isn't absolutely corr !! 852                 swap_entry_free(p, entry, SWAP_HAS_CACHE);
1687          * so low that it isn't necessary to  << 
1688          */                                   << 
1689         if (nr_swapfiles > 1)                 << 
1690                 sort(entries, n, sizeof(entri << 
1691         for (i = 0; i < n; ++i) {             << 
1692                 p = swap_info_get_cont(entrie << 
1693                 if (p)                        << 
1694                         swap_entry_range_free << 
1695                 prev = p;                     << 
1696         }                                     << 
1697         if (p)                                << 
1698                 spin_unlock(&p->lock);           853                 spin_unlock(&p->lock);
1699 }                                             !! 854         }
1700                                               << 
1701 int __swap_count(swp_entry_t entry)           << 
1702 {                                             << 
1703         struct swap_info_struct *si = swp_swa << 
1704         pgoff_t offset = swp_offset(entry);   << 
1705                                               << 
1706         return swap_count(si->swap_map[offset << 
1707 }                                                855 }
1708                                                  856 
1709 /*                                               857 /*
1710  * How many references to @entry are currentl !! 858  * How many references to page are currently swapped out?
1711  * This does not give an exact answer when sw    859  * This does not give an exact answer when swap count is continued,
1712  * but does include the high COUNT_CONTINUED     860  * but does include the high COUNT_CONTINUED flag to allow for that.
1713  */                                              861  */
1714 int swap_swapcount(struct swap_info_struct *s !! 862 int page_swapcount(struct page *page)
1715 {                                                863 {
1716         pgoff_t offset = swp_offset(entry);   !! 864         int count = 0;
1717         struct swap_cluster_info *ci;         !! 865         struct swap_info_struct *p;
1718         int count;                            !! 866         swp_entry_t entry;
1719                                                  867 
1720         ci = lock_cluster_or_swap_info(si, of !! 868         entry.val = page_private(page);
1721         count = swap_count(si->swap_map[offse !! 869         p = swap_info_get(entry);
1722         unlock_cluster_or_swap_info(si, ci);  !! 870         if (p) {
                                                   >> 871                 count = swap_count(p->swap_map[swp_offset(entry)]);
                                                   >> 872                 spin_unlock(&p->lock);
                                                   >> 873         }
1723         return count;                            874         return count;
1724 }                                                875 }
1725                                                  876 
1726 /*                                               877 /*
1727  * How many references to @entry are currentl    878  * How many references to @entry are currently swapped out?
1728  * This considers COUNT_CONTINUED so it retur    879  * This considers COUNT_CONTINUED so it returns exact answer.
1729  */                                              880  */
1730 int swp_swapcount(swp_entry_t entry)             881 int swp_swapcount(swp_entry_t entry)
1731 {                                                882 {
1732         int count, tmp_count, n;                 883         int count, tmp_count, n;
1733         struct swap_info_struct *si;          !! 884         struct swap_info_struct *p;
1734         struct swap_cluster_info *ci;         << 
1735         struct page *page;                       885         struct page *page;
1736         pgoff_t offset;                          886         pgoff_t offset;
1737         unsigned char *map;                      887         unsigned char *map;
1738                                                  888 
1739         si = _swap_info_get(entry);           !! 889         p = swap_info_get(entry);
1740         if (!si)                              !! 890         if (!p)
1741                 return 0;                        891                 return 0;
1742                                                  892 
1743         offset = swp_offset(entry);           !! 893         count = swap_count(p->swap_map[swp_offset(entry)]);
1744                                               << 
1745         ci = lock_cluster_or_swap_info(si, of << 
1746                                               << 
1747         count = swap_count(si->swap_map[offse << 
1748         if (!(count & COUNT_CONTINUED))          894         if (!(count & COUNT_CONTINUED))
1749                 goto out;                        895                 goto out;
1750                                                  896 
1751         count &= ~COUNT_CONTINUED;               897         count &= ~COUNT_CONTINUED;
1752         n = SWAP_MAP_MAX + 1;                    898         n = SWAP_MAP_MAX + 1;
1753                                                  899 
1754         page = vmalloc_to_page(si->swap_map + !! 900         offset = swp_offset(entry);
                                                   >> 901         page = vmalloc_to_page(p->swap_map + offset);
1755         offset &= ~PAGE_MASK;                    902         offset &= ~PAGE_MASK;
1756         VM_BUG_ON(page_private(page) != SWP_C    903         VM_BUG_ON(page_private(page) != SWP_CONTINUED);
1757                                                  904 
1758         do {                                     905         do {
1759                 page = list_next_entry(page,  !! 906                 page = list_entry(page->lru.next, struct page, lru);
1760                 map = kmap_local_page(page);  !! 907                 map = kmap_atomic(page);
1761                 tmp_count = map[offset];         908                 tmp_count = map[offset];
1762                 kunmap_local(map);            !! 909                 kunmap_atomic(map);
1763                                                  910 
1764                 count += (tmp_count & ~COUNT_    911                 count += (tmp_count & ~COUNT_CONTINUED) * n;
1765                 n *= (SWAP_CONT_MAX + 1);        912                 n *= (SWAP_CONT_MAX + 1);
1766         } while (tmp_count & COUNT_CONTINUED)    913         } while (tmp_count & COUNT_CONTINUED);
1767 out:                                             914 out:
1768         unlock_cluster_or_swap_info(si, ci);  !! 915         spin_unlock(&p->lock);
1769         return count;                            916         return count;
1770 }                                                917 }
1771                                                  918 
1772 static bool swap_page_trans_huge_swapped(stru !! 919 /*
1773                                          swp_ !! 920  * We can write to an anon page without COW if there are no other references
                                                   >> 921  * to it.  And as a side-effect, free up its swap: because the old content
                                                   >> 922  * on disk will never be read, and seeking back there to write new content
                                                   >> 923  * later would only waste time away from clustering.
                                                   >> 924  */
                                                   >> 925 int reuse_swap_page(struct page *page)
1774 {                                                926 {
1775         struct swap_cluster_info *ci;         !! 927         int count;
1776         unsigned char *map = si->swap_map;    << 
1777         unsigned int nr_pages = 1 << order;   << 
1778         unsigned long roffset = swp_offset(en << 
1779         unsigned long offset = round_down(rof << 
1780         int i;                                << 
1781         bool ret = false;                     << 
1782                                                  928 
1783         ci = lock_cluster_or_swap_info(si, of !! 929         VM_BUG_ON_PAGE(!PageLocked(page), page);
1784         if (!ci || nr_pages == 1) {           !! 930         if (unlikely(PageKsm(page)))
1785                 if (swap_count(map[roffset])) !! 931                 return 0;
1786                         ret = true;           !! 932         count = page_mapcount(page);
1787                 goto unlock_out;              !! 933         if (count <= 1 && PageSwapCache(page)) {
1788         }                                     !! 934                 count += page_swapcount(page);
1789         for (i = 0; i < nr_pages; i++) {      !! 935                 if (count == 1 && !PageWriteback(page)) {
1790                 if (swap_count(map[offset + i !! 936                         delete_from_swap_cache(page);
1791                         ret = true;           !! 937                         SetPageDirty(page);
1792                         break;                << 
1793                 }                                938                 }
1794         }                                        939         }
1795 unlock_out:                                   !! 940         return count <= 1;
1796         unlock_cluster_or_swap_info(si, ci);  << 
1797         return ret;                           << 
1798 }                                             << 
1799                                               << 
1800 static bool folio_swapped(struct folio *folio << 
1801 {                                             << 
1802         swp_entry_t entry = folio->swap;      << 
1803         struct swap_info_struct *si = _swap_i << 
1804                                               << 
1805         if (!si)                              << 
1806                 return false;                 << 
1807                                               << 
1808         if (!IS_ENABLED(CONFIG_THP_SWAP) || l << 
1809                 return swap_swapcount(si, ent << 
1810                                               << 
1811         return swap_page_trans_huge_swapped(s << 
1812 }                                                941 }
1813                                                  942 
1814 static bool folio_swapcache_freeable(struct f !! 943 /*
                                                   >> 944  * If swap is getting full, or if there are no more mappings of this page,
                                                   >> 945  * then try_to_free_swap is called to free its swap space.
                                                   >> 946  */
                                                   >> 947 int try_to_free_swap(struct page *page)
1815 {                                                948 {
1816         VM_BUG_ON_FOLIO(!folio_test_locked(fo !! 949         VM_BUG_ON_PAGE(!PageLocked(page), page);
1817                                                  950 
1818         if (!folio_test_swapcache(folio))     !! 951         if (!PageSwapCache(page))
1819                 return false;                 !! 952                 return 0;
1820         if (folio_test_writeback(folio))      !! 953         if (PageWriteback(page))
1821                 return false;                 !! 954                 return 0;
                                                   >> 955         if (page_swapcount(page))
                                                   >> 956                 return 0;
1822                                                  957 
1823         /*                                       958         /*
1824          * Once hibernation has begun to crea    959          * Once hibernation has begun to create its image of memory,
1825          * there's a danger that one of the c !! 960          * there's a danger that one of the calls to try_to_free_swap()
1826          * - most probably a call from __try_    961          * - most probably a call from __try_to_reclaim_swap() while
1827          * hibernation is allocating its own     962          * hibernation is allocating its own swap pages for the image,
1828          * but conceivably even a call from m    963          * but conceivably even a call from memory reclaim - will free
1829          * the swap from a folio which has al !! 964          * the swap from a page which has already been recorded in the
1830          * image as a clean swapcache folio,  !! 965          * image as a clean swapcache page, and then reuse its swap for
1831          * another page of the image.  On wak    966          * another page of the image.  On waking from hibernation, the
1832          * original folio might be freed unde !! 967          * original page might be freed under memory pressure, then
1833          * later read back in from swap, now     968          * later read back in from swap, now with the wrong data.
1834          *                                       969          *
1835          * Hibernation suspends storage while    970          * Hibernation suspends storage while it is writing the image
1836          * to disk so check that here.           971          * to disk so check that here.
1837          */                                      972          */
1838         if (pm_suspended_storage())              973         if (pm_suspended_storage())
1839                 return false;                 !! 974                 return 0;
1840                                               << 
1841         return true;                          << 
1842 }                                             << 
1843                                               << 
1844 /**                                           << 
1845  * folio_free_swap() - Free the swap space us << 
1846  * @folio: The folio to remove.               << 
1847  *                                            << 
1848  * If swap is getting full, or if there are n << 
1849  * then call folio_free_swap to free its swap << 
1850  *                                            << 
1851  * Return: true if we were able to release th << 
1852  */                                           << 
1853 bool folio_free_swap(struct folio *folio)     << 
1854 {                                             << 
1855         if (!folio_swapcache_freeable(folio)) << 
1856                 return false;                 << 
1857         if (folio_swapped(folio))             << 
1858                 return false;                 << 
1859                                                  975 
1860         delete_from_swap_cache(folio);        !! 976         delete_from_swap_cache(page);
1861         folio_set_dirty(folio);               !! 977         SetPageDirty(page);
1862         return true;                          !! 978         return 1;
1863 }                                                979 }
1864                                                  980 
1865 /**                                           !! 981 /*
1866  * free_swap_and_cache_nr() - Release referen !! 982  * Free the swap entry like above, but also try to
1867  *                            reclaim their c !! 983  * free the page cache entry if it is the last user.
1868  * @entry: First entry of range.              << 
1869  * @nr: Number of entries in range.           << 
1870  *                                            << 
1871  * For each swap entry in the contiguous rang << 
1872  * entries become free, try to reclaim their  << 
1873  * offset range is defined by [entry.offset,  << 
1874  */                                              984  */
1875 void free_swap_and_cache_nr(swp_entry_t entry !! 985 int free_swap_and_cache(swp_entry_t entry)
1876 {                                                986 {
1877         const unsigned long start_offset = sw !! 987         struct swap_info_struct *p;
1878         const unsigned long end_offset = star !! 988         struct page *page = NULL;
1879         struct swap_info_struct *si;          << 
1880         bool any_only_cache = false;          << 
1881         unsigned long offset;                 << 
1882                                                  989 
1883         if (non_swap_entry(entry))               990         if (non_swap_entry(entry))
1884                 return;                       !! 991                 return 1;
1885                                                  992 
1886         si = get_swap_device(entry);          !! 993         p = swap_info_get(entry);
1887         if (!si)                              !! 994         if (p) {
1888                 return;                       !! 995                 if (swap_entry_free(p, entry, 1) == SWAP_HAS_CACHE) {
1889                                               !! 996                         page = find_get_page(swap_address_space(entry),
1890         if (WARN_ON(end_offset > si->max))    !! 997                                                 entry.val);
1891                 goto out;                     !! 998                         if (page && !trylock_page(page)) {
1892                                               !! 999                                 page_cache_release(page);
1893         /*                                    !! 1000                                 page = NULL;
1894          * First free all entries in the rang !! 1001                         }
1895          */                                   << 
1896         any_only_cache = __swap_entries_free( << 
1897                                               << 
1898         /*                                    << 
1899          * Short-circuit the below loop if no << 
1900          * reference drop to zero.            << 
1901          */                                   << 
1902         if (!any_only_cache)                  << 
1903                 goto out;                     << 
1904                                               << 
1905         /*                                    << 
1906          * Now go back over the range trying  << 
1907          * more efficient for large folios be << 
1908          * the swap once per folio in the com << 
1909          * __swap_entry_free() and __try_to_r << 
1910          * latter will get a reference and lo << 
1911          * page but will only succeed once th << 
1912          * zero.                              << 
1913          */                                   << 
1914         for (offset = start_offset; offset <  << 
1915                 nr = 1;                       << 
1916                 if (READ_ONCE(si->swap_map[of << 
1917                         /*                    << 
1918                          * Folios are always  << 
1919                          * advance forward to << 
1920                          * folio was found fo << 
1921                          * in this case. Nega << 
1922                          * but could not be r << 
1923                          * to the next bounda << 
1924                          */                   << 
1925                         nr = __try_to_reclaim << 
1926                                               << 
1927                         if (nr == 0)          << 
1928                                 nr = 1;       << 
1929                         else if (nr < 0)      << 
1930                                 nr = -nr;     << 
1931                         nr = ALIGN(offset + 1 << 
1932                 }                                1002                 }
                                                   >> 1003                 spin_unlock(&p->lock);
1933         }                                        1004         }
1934                                               !! 1005         if (page) {
1935 out:                                          !! 1006                 /*
1936         put_swap_device(si);                  !! 1007                  * Not mapped elsewhere, or swap space full? Free it!
                                                   >> 1008                  * Also recheck PageSwapCache now page is locked (above).
                                                   >> 1009                  */
                                                   >> 1010                 if (PageSwapCache(page) && !PageWriteback(page) &&
                                                   >> 1011                                 (!page_mapped(page) || vm_swap_full())) {
                                                   >> 1012                         delete_from_swap_cache(page);
                                                   >> 1013                         SetPageDirty(page);
                                                   >> 1014                 }
                                                   >> 1015                 unlock_page(page);
                                                   >> 1016                 page_cache_release(page);
                                                   >> 1017         }
                                                   >> 1018         return p != NULL;
1937 }                                                1019 }
1938                                                  1020 
1939 #ifdef CONFIG_HIBERNATION                        1021 #ifdef CONFIG_HIBERNATION
1940                                               << 
1941 swp_entry_t get_swap_page_of_type(int type)   << 
1942 {                                             << 
1943         struct swap_info_struct *si = swap_ty << 
1944         swp_entry_t entry = {0};              << 
1945                                               << 
1946         if (!si)                              << 
1947                 goto fail;                    << 
1948                                               << 
1949         /* This is called for allocating swap << 
1950         spin_lock(&si->lock);                 << 
1951         if ((si->flags & SWP_WRITEOK) && scan << 
1952                 atomic_long_dec(&nr_swap_page << 
1953         spin_unlock(&si->lock);               << 
1954 fail:                                         << 
1955         return entry;                         << 
1956 }                                             << 
1957                                               << 
1958 /*                                               1022 /*
1959  * Find the swap type that corresponds to giv    1023  * Find the swap type that corresponds to given device (if any).
1960  *                                               1024  *
1961  * @offset - number of the PAGE_SIZE-sized bl    1025  * @offset - number of the PAGE_SIZE-sized block of the device, starting
1962  * from 0, in which the swap header is expect    1026  * from 0, in which the swap header is expected to be located.
1963  *                                               1027  *
1964  * This is needed for the suspend to disk (ak    1028  * This is needed for the suspend to disk (aka swsusp).
1965  */                                              1029  */
1966 int swap_type_of(dev_t device, sector_t offse !! 1030 int swap_type_of(dev_t device, sector_t offset, struct block_device **bdev_p)
1967 {                                                1031 {
                                                   >> 1032         struct block_device *bdev = NULL;
1968         int type;                                1033         int type;
1969                                                  1034 
1970         if (!device)                          !! 1035         if (device)
1971                 return -1;                    !! 1036                 bdev = bdget(device);
1972                                                  1037 
1973         spin_lock(&swap_lock);                   1038         spin_lock(&swap_lock);
1974         for (type = 0; type < nr_swapfiles; t    1039         for (type = 0; type < nr_swapfiles; type++) {
1975                 struct swap_info_struct *sis     1040                 struct swap_info_struct *sis = swap_info[type];
1976                                                  1041 
1977                 if (!(sis->flags & SWP_WRITEO    1042                 if (!(sis->flags & SWP_WRITEOK))
1978                         continue;                1043                         continue;
1979                                                  1044 
1980                 if (device == sis->bdev->bd_d !! 1045                 if (!bdev) {
1981                         struct swap_extent *s !! 1046                         if (bdev_p)
                                                   >> 1047                                 *bdev_p = bdgrab(sis->bdev);
                                                   >> 1048 
                                                   >> 1049                         spin_unlock(&swap_lock);
                                                   >> 1050                         return type;
                                                   >> 1051                 }
                                                   >> 1052                 if (bdev == sis->bdev) {
                                                   >> 1053                         struct swap_extent *se = &sis->first_swap_extent;
1982                                                  1054 
1983                         if (se->start_block =    1055                         if (se->start_block == offset) {
                                                   >> 1056                                 if (bdev_p)
                                                   >> 1057                                         *bdev_p = bdgrab(sis->bdev);
                                                   >> 1058 
1984                                 spin_unlock(&    1059                                 spin_unlock(&swap_lock);
                                                   >> 1060                                 bdput(bdev);
1985                                 return type;     1061                                 return type;
1986                         }                        1062                         }
1987                 }                                1063                 }
1988         }                                        1064         }
1989         spin_unlock(&swap_lock);                 1065         spin_unlock(&swap_lock);
1990         return -ENODEV;                       !! 1066         if (bdev)
1991 }                                             !! 1067                 bdput(bdev);
1992                                               << 
1993 int find_first_swap(dev_t *device)            << 
1994 {                                             << 
1995         int type;                             << 
1996                                               << 
1997         spin_lock(&swap_lock);                << 
1998         for (type = 0; type < nr_swapfiles; t << 
1999                 struct swap_info_struct *sis  << 
2000                                                  1068 
2001                 if (!(sis->flags & SWP_WRITEO << 
2002                         continue;             << 
2003                 *device = sis->bdev->bd_dev;  << 
2004                 spin_unlock(&swap_lock);      << 
2005                 return type;                  << 
2006         }                                     << 
2007         spin_unlock(&swap_lock);              << 
2008         return -ENODEV;                          1069         return -ENODEV;
2009 }                                                1070 }
2010                                                  1071 
2011 /*                                               1072 /*
2012  * Get the (PAGE_SIZE) block corresponding to    1073  * Get the (PAGE_SIZE) block corresponding to given offset on the swapdev
2013  * corresponding to given index in swap_info     1074  * corresponding to given index in swap_info (swap type).
2014  */                                              1075  */
2015 sector_t swapdev_block(int type, pgoff_t offs    1076 sector_t swapdev_block(int type, pgoff_t offset)
2016 {                                                1077 {
2017         struct swap_info_struct *si = swap_ty !! 1078         struct block_device *bdev;
2018         struct swap_extent *se;               << 
2019                                                  1079 
2020         if (!si || !(si->flags & SWP_WRITEOK) !! 1080         if ((unsigned int)type >= nr_swapfiles)
                                                   >> 1081                 return 0;
                                                   >> 1082         if (!(swap_info[type]->flags & SWP_WRITEOK))
2021                 return 0;                        1083                 return 0;
2022         se = offset_to_swap_extent(si, offset !! 1084         return map_swap_entry(swp_entry(type, offset), &bdev);
2023         return se->start_block + (offset - se << 
2024 }                                                1085 }
2025                                                  1086 
2026 /*                                               1087 /*
2027  * Return either the total number of swap pag    1088  * Return either the total number of swap pages of given type, or the number
2028  * of free pages of that type (depending on @    1089  * of free pages of that type (depending on @free)
2029  *                                               1090  *
2030  * This is needed for software suspend           1091  * This is needed for software suspend
2031  */                                              1092  */
2032 unsigned int count_swap_pages(int type, int f    1093 unsigned int count_swap_pages(int type, int free)
2033 {                                                1094 {
2034         unsigned int n = 0;                      1095         unsigned int n = 0;
2035                                                  1096 
2036         spin_lock(&swap_lock);                   1097         spin_lock(&swap_lock);
2037         if ((unsigned int)type < nr_swapfiles    1098         if ((unsigned int)type < nr_swapfiles) {
2038                 struct swap_info_struct *sis     1099                 struct swap_info_struct *sis = swap_info[type];
2039                                                  1100 
2040                 spin_lock(&sis->lock);           1101                 spin_lock(&sis->lock);
2041                 if (sis->flags & SWP_WRITEOK)    1102                 if (sis->flags & SWP_WRITEOK) {
2042                         n = sis->pages;          1103                         n = sis->pages;
2043                         if (free)                1104                         if (free)
2044                                 n -= sis->inu    1105                                 n -= sis->inuse_pages;
2045                 }                                1106                 }
2046                 spin_unlock(&sis->lock);         1107                 spin_unlock(&sis->lock);
2047         }                                        1108         }
2048         spin_unlock(&swap_lock);                 1109         spin_unlock(&swap_lock);
2049         return n;                                1110         return n;
2050 }                                                1111 }
2051 #endif /* CONFIG_HIBERNATION */                  1112 #endif /* CONFIG_HIBERNATION */
2052                                                  1113 
2053 static inline int pte_same_as_swp(pte_t pte,  !! 1114 static inline int maybe_same_pte(pte_t pte, pte_t swp_pte)
2054 {                                                1115 {
2055         return pte_same(pte_swp_clear_flags(p !! 1116 #ifdef CONFIG_MEM_SOFT_DIRTY
                                                   >> 1117         /*
                                                   >> 1118          * When pte keeps soft dirty bit the pte generated
                                                   >> 1119          * from swap entry does not has it, still it's same
                                                   >> 1120          * pte from logical point of view.
                                                   >> 1121          */
                                                   >> 1122         pte_t swp_pte_dirty = pte_swp_mksoft_dirty(swp_pte);
                                                   >> 1123         return pte_same(pte, swp_pte) || pte_same(pte, swp_pte_dirty);
                                                   >> 1124 #else
                                                   >> 1125         return pte_same(pte, swp_pte);
                                                   >> 1126 #endif
2056 }                                                1127 }
2057                                                  1128 
2058 /*                                               1129 /*
2059  * No need to decide whether this PTE shares     1130  * No need to decide whether this PTE shares the swap entry with others,
2060  * just let do_wp_page work it out if a write    1131  * just let do_wp_page work it out if a write is requested later - to
2061  * force COW, vm_page_prot omits write permis    1132  * force COW, vm_page_prot omits write permission from any private vma.
2062  */                                              1133  */
2063 static int unuse_pte(struct vm_area_struct *v    1134 static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
2064                 unsigned long addr, swp_entry !! 1135                 unsigned long addr, swp_entry_t entry, struct page *page)
2065 {                                                1136 {
2066         struct page *page;                    !! 1137         struct page *swapcache;
2067         struct folio *swapcache;              !! 1138         struct mem_cgroup *memcg;
2068         spinlock_t *ptl;                         1139         spinlock_t *ptl;
2069         pte_t *pte, new_pte, old_pte;         !! 1140         pte_t *pte;
2070         bool hwpoisoned = false;              << 
2071         int ret = 1;                             1141         int ret = 1;
2072                                                  1142 
2073         swapcache = folio;                    !! 1143         swapcache = page;
2074         folio = ksm_might_need_to_copy(folio, !! 1144         page = ksm_might_need_to_copy(page, vma, addr);
2075         if (unlikely(!folio))                 !! 1145         if (unlikely(!page))
2076                 return -ENOMEM;                  1146                 return -ENOMEM;
2077         else if (unlikely(folio == ERR_PTR(-E << 
2078                 hwpoisoned = true;            << 
2079                 folio = swapcache;            << 
2080         }                                     << 
2081                                                  1147 
2082         page = folio_file_page(folio, swp_off !! 1148         if (mem_cgroup_try_charge(page, vma->vm_mm, GFP_KERNEL, &memcg)) {
2083         if (PageHWPoison(page))               !! 1149                 ret = -ENOMEM;
2084                 hwpoisoned = true;            !! 1150                 goto out_nolock;
                                                   >> 1151         }
2085                                                  1152 
2086         pte = pte_offset_map_lock(vma->vm_mm,    1153         pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
2087         if (unlikely(!pte || !pte_same_as_swp !! 1154         if (unlikely(!maybe_same_pte(*pte, swp_entry_to_pte(entry)))) {
2088                                               !! 1155                 mem_cgroup_cancel_charge(page, memcg);
2089                 ret = 0;                         1156                 ret = 0;
2090                 goto out;                        1157                 goto out;
2091         }                                        1158         }
2092                                                  1159 
2093         old_pte = ptep_get(pte);              << 
2094                                               << 
2095         if (unlikely(hwpoisoned || !folio_tes << 
2096                 swp_entry_t swp_entry;        << 
2097                                               << 
2098                 dec_mm_counter(vma->vm_mm, MM << 
2099                 if (hwpoisoned) {             << 
2100                         swp_entry = make_hwpo << 
2101                 } else {                      << 
2102                         swp_entry = make_pois << 
2103                 }                             << 
2104                 new_pte = swp_entry_to_pte(sw << 
2105                 ret = 0;                      << 
2106                 goto setpte;                  << 
2107         }                                     << 
2108                                               << 
2109         /*                                    << 
2110          * Some architectures may have to res << 
2111          * when reading from swap. This metad << 
2112          * so this must be called before swap << 
2113          */                                   << 
2114         arch_swap_restore(folio_swap(entry, f << 
2115                                               << 
2116         dec_mm_counter(vma->vm_mm, MM_SWAPENT    1160         dec_mm_counter(vma->vm_mm, MM_SWAPENTS);
2117         inc_mm_counter(vma->vm_mm, MM_ANONPAG    1161         inc_mm_counter(vma->vm_mm, MM_ANONPAGES);
2118         folio_get(folio);                     !! 1162         get_page(page);
2119         if (folio == swapcache) {             !! 1163         set_pte_at(vma->vm_mm, addr, pte,
2120                 rmap_t rmap_flags = RMAP_NONE !! 1164                    pte_mkold(mk_pte(page, vma->vm_page_prot)));
2121                                               !! 1165         if (page == swapcache) {
2122                 /*                            !! 1166                 page_add_anon_rmap(page, vma, addr);
2123                  * See do_swap_page(): writeb !! 1167                 mem_cgroup_commit_charge(page, memcg, true);
2124                  * However, we do a folio_wai << 
2125                  * call and have the folio lo << 
2126                  */                           << 
2127                 VM_BUG_ON_FOLIO(folio_test_wr << 
2128                 if (pte_swp_exclusive(old_pte << 
2129                         rmap_flags |= RMAP_EX << 
2130                 /*                            << 
2131                  * We currently only expect s << 
2132                  * fully exclusive or fully s << 
2133                  * here, we have to be carefu << 
2134                  */                           << 
2135                 if (!folio_test_anon(folio))  << 
2136                         VM_WARN_ON_ONCE(folio << 
2137                         VM_WARN_ON_FOLIO(!fol << 
2138                         folio_add_new_anon_rm << 
2139                 } else {                      << 
2140                         folio_add_anon_rmap_p << 
2141                 }                             << 
2142         } else { /* ksm created a completely     1168         } else { /* ksm created a completely new copy */
2143                 folio_add_new_anon_rmap(folio !! 1169                 page_add_new_anon_rmap(page, vma, addr);
2144                 folio_add_lru_vma(folio, vma) !! 1170                 mem_cgroup_commit_charge(page, memcg, false);
                                                   >> 1171                 lru_cache_add_active_or_unevictable(page, vma);
2145         }                                        1172         }
2146         new_pte = pte_mkold(mk_pte(page, vma- << 
2147         if (pte_swp_soft_dirty(old_pte))      << 
2148                 new_pte = pte_mksoft_dirty(ne << 
2149         if (pte_swp_uffd_wp(old_pte))         << 
2150                 new_pte = pte_mkuffd_wp(new_p << 
2151 setpte:                                       << 
2152         set_pte_at(vma->vm_mm, addr, pte, new << 
2153         swap_free(entry);                        1173         swap_free(entry);
                                                   >> 1174         /*
                                                   >> 1175          * Move the page to the active list so it is not
                                                   >> 1176          * immediately swapped out again after swapon.
                                                   >> 1177          */
                                                   >> 1178         activate_page(page);
2154 out:                                             1179 out:
2155         if (pte)                              !! 1180         pte_unmap_unlock(pte, ptl);
2156                 pte_unmap_unlock(pte, ptl);   !! 1181 out_nolock:
2157         if (folio != swapcache) {             !! 1182         if (page != swapcache) {
2158                 folio_unlock(folio);          !! 1183                 unlock_page(page);
2159                 folio_put(folio);             !! 1184                 put_page(page);
2160         }                                        1185         }
2161         return ret;                              1186         return ret;
2162 }                                                1187 }
2163                                                  1188 
2164 static int unuse_pte_range(struct vm_area_str    1189 static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
2165                         unsigned long addr, u !! 1190                                 unsigned long addr, unsigned long end,
2166                         unsigned int type)    !! 1191                                 swp_entry_t entry, struct page *page)
2167 {                                                1192 {
2168         pte_t *pte = NULL;                    !! 1193         pte_t swp_pte = swp_entry_to_pte(entry);
2169         struct swap_info_struct *si;          !! 1194         pte_t *pte;
                                                   >> 1195         int ret = 0;
2170                                                  1196 
2171         si = swap_info[type];                 !! 1197         /*
                                                   >> 1198          * We don't actually need pte lock while scanning for swp_pte: since
                                                   >> 1199          * we hold page lock and mmap_sem, swp_pte cannot be inserted into the
                                                   >> 1200          * page table while we're scanning; though it could get zapped, and on
                                                   >> 1201          * some architectures (e.g. x86_32 with PAE) we might catch a glimpse
                                                   >> 1202          * of unmatched parts which look like swp_pte, so unuse_pte must
                                                   >> 1203          * recheck under pte lock.  Scanning without pte lock lets it be
                                                   >> 1204          * preemptable whenever CONFIG_PREEMPT but not CONFIG_HIGHPTE.
                                                   >> 1205          */
                                                   >> 1206         pte = pte_offset_map(pmd, addr);
2172         do {                                     1207         do {
2173                 struct folio *folio;          !! 1208                 /*
2174                 unsigned long offset;         !! 1209                  * swapoff spends a _lot_ of time in this loop!
2175                 unsigned char swp_count;      !! 1210                  * Test inline before going to call unuse_pte.
2176                 swp_entry_t entry;            !! 1211                  */
2177                 int ret;                      !! 1212                 if (unlikely(maybe_same_pte(*pte, swp_pte))) {
2178                 pte_t ptent;                  !! 1213                         pte_unmap(pte);
2179                                               !! 1214                         ret = unuse_pte(vma, pmd, addr, entry, page);
2180                 if (!pte++) {                 !! 1215                         if (ret)
                                                   >> 1216                                 goto out;
2181                         pte = pte_offset_map(    1217                         pte = pte_offset_map(pmd, addr);
2182                         if (!pte)             << 
2183                                 break;        << 
2184                 }                             << 
2185                                               << 
2186                 ptent = ptep_get_lockless(pte << 
2187                                               << 
2188                 if (!is_swap_pte(ptent))      << 
2189                         continue;             << 
2190                                               << 
2191                 entry = pte_to_swp_entry(pten << 
2192                 if (swp_type(entry) != type)  << 
2193                         continue;             << 
2194                                               << 
2195                 offset = swp_offset(entry);   << 
2196                 pte_unmap(pte);               << 
2197                 pte = NULL;                   << 
2198                                               << 
2199                 folio = swap_cache_get_folio( << 
2200                 if (!folio) {                 << 
2201                         struct vm_fault vmf = << 
2202                                 .vma = vma,   << 
2203                                 .address = ad << 
2204                                 .real_address << 
2205                                 .pmd = pmd,   << 
2206                         };                    << 
2207                                               << 
2208                         folio = swapin_readah << 
2209                                               << 
2210                 }                             << 
2211                 if (!folio) {                 << 
2212                         swp_count = READ_ONCE << 
2213                         if (swp_count == 0 || << 
2214                                 continue;     << 
2215                         return -ENOMEM;       << 
2216                 }                             << 
2217                                               << 
2218                 folio_lock(folio);            << 
2219                 folio_wait_writeback(folio);  << 
2220                 ret = unuse_pte(vma, pmd, add << 
2221                 if (ret < 0) {                << 
2222                         folio_unlock(folio);  << 
2223                         folio_put(folio);     << 
2224                         return ret;           << 
2225                 }                                1218                 }
2226                                               !! 1219         } while (pte++, addr += PAGE_SIZE, addr != end);
2227                 folio_free_swap(folio);       !! 1220         pte_unmap(pte - 1);
2228                 folio_unlock(folio);          !! 1221 out:
2229                 folio_put(folio);             !! 1222         return ret;
2230         } while (addr += PAGE_SIZE, addr != e << 
2231                                               << 
2232         if (pte)                              << 
2233                 pte_unmap(pte);               << 
2234         return 0;                             << 
2235 }                                                1223 }
2236                                                  1224 
2237 static inline int unuse_pmd_range(struct vm_a    1225 static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
2238                                 unsigned long    1226                                 unsigned long addr, unsigned long end,
2239                                 unsigned int  !! 1227                                 swp_entry_t entry, struct page *page)
2240 {                                                1228 {
2241         pmd_t *pmd;                              1229         pmd_t *pmd;
2242         unsigned long next;                      1230         unsigned long next;
2243         int ret;                                 1231         int ret;
2244                                                  1232 
2245         pmd = pmd_offset(pud, addr);             1233         pmd = pmd_offset(pud, addr);
2246         do {                                     1234         do {
2247                 cond_resched();               << 
2248                 next = pmd_addr_end(addr, end    1235                 next = pmd_addr_end(addr, end);
2249                 ret = unuse_pte_range(vma, pm !! 1236                 if (pmd_none_or_trans_huge_or_clear_bad(pmd))
                                                   >> 1237                         continue;
                                                   >> 1238                 ret = unuse_pte_range(vma, pmd, addr, next, entry, page);
2250                 if (ret)                         1239                 if (ret)
2251                         return ret;              1240                         return ret;
2252         } while (pmd++, addr = next, addr !=     1241         } while (pmd++, addr = next, addr != end);
2253         return 0;                                1242         return 0;
2254 }                                                1243 }
2255                                                  1244 
2256 static inline int unuse_pud_range(struct vm_a !! 1245 static inline int unuse_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
2257                                 unsigned long    1246                                 unsigned long addr, unsigned long end,
2258                                 unsigned int  !! 1247                                 swp_entry_t entry, struct page *page)
2259 {                                                1248 {
2260         pud_t *pud;                              1249         pud_t *pud;
2261         unsigned long next;                      1250         unsigned long next;
2262         int ret;                                 1251         int ret;
2263                                                  1252 
2264         pud = pud_offset(p4d, addr);          !! 1253         pud = pud_offset(pgd, addr);
2265         do {                                     1254         do {
2266                 next = pud_addr_end(addr, end    1255                 next = pud_addr_end(addr, end);
2267                 if (pud_none_or_clear_bad(pud    1256                 if (pud_none_or_clear_bad(pud))
2268                         continue;                1257                         continue;
2269                 ret = unuse_pmd_range(vma, pu !! 1258                 ret = unuse_pmd_range(vma, pud, addr, next, entry, page);
2270                 if (ret)                         1259                 if (ret)
2271                         return ret;              1260                         return ret;
2272         } while (pud++, addr = next, addr !=     1261         } while (pud++, addr = next, addr != end);
2273         return 0;                                1262         return 0;
2274 }                                                1263 }
2275                                                  1264 
2276 static inline int unuse_p4d_range(struct vm_a !! 1265 static int unuse_vma(struct vm_area_struct *vma,
2277                                 unsigned long !! 1266                                 swp_entry_t entry, struct page *page)
2278                                 unsigned int  << 
2279 {                                             << 
2280         p4d_t *p4d;                           << 
2281         unsigned long next;                   << 
2282         int ret;                              << 
2283                                               << 
2284         p4d = p4d_offset(pgd, addr);          << 
2285         do {                                  << 
2286                 next = p4d_addr_end(addr, end << 
2287                 if (p4d_none_or_clear_bad(p4d << 
2288                         continue;             << 
2289                 ret = unuse_pud_range(vma, p4 << 
2290                 if (ret)                      << 
2291                         return ret;           << 
2292         } while (p4d++, addr = next, addr !=  << 
2293         return 0;                             << 
2294 }                                             << 
2295                                               << 
2296 static int unuse_vma(struct vm_area_struct *v << 
2297 {                                                1267 {
2298         pgd_t *pgd;                              1268         pgd_t *pgd;
2299         unsigned long addr, end, next;           1269         unsigned long addr, end, next;
2300         int ret;                                 1270         int ret;
2301                                                  1271 
2302         addr = vma->vm_start;                 !! 1272         if (page_anon_vma(page)) {
2303         end = vma->vm_end;                    !! 1273                 addr = page_address_in_vma(page, vma);
                                                   >> 1274                 if (addr == -EFAULT)
                                                   >> 1275                         return 0;
                                                   >> 1276                 else
                                                   >> 1277                         end = addr + PAGE_SIZE;
                                                   >> 1278         } else {
                                                   >> 1279                 addr = vma->vm_start;
                                                   >> 1280                 end = vma->vm_end;
                                                   >> 1281         }
2304                                                  1282 
2305         pgd = pgd_offset(vma->vm_mm, addr);      1283         pgd = pgd_offset(vma->vm_mm, addr);
2306         do {                                     1284         do {
2307                 next = pgd_addr_end(addr, end    1285                 next = pgd_addr_end(addr, end);
2308                 if (pgd_none_or_clear_bad(pgd    1286                 if (pgd_none_or_clear_bad(pgd))
2309                         continue;                1287                         continue;
2310                 ret = unuse_p4d_range(vma, pg !! 1288                 ret = unuse_pud_range(vma, pgd, addr, next, entry, page);
2311                 if (ret)                         1289                 if (ret)
2312                         return ret;              1290                         return ret;
2313         } while (pgd++, addr = next, addr !=     1291         } while (pgd++, addr = next, addr != end);
2314         return 0;                                1292         return 0;
2315 }                                                1293 }
2316                                                  1294 
2317 static int unuse_mm(struct mm_struct *mm, uns !! 1295 static int unuse_mm(struct mm_struct *mm,
                                                   >> 1296                                 swp_entry_t entry, struct page *page)
2318 {                                                1297 {
2319         struct vm_area_struct *vma;              1298         struct vm_area_struct *vma;
2320         int ret = 0;                             1299         int ret = 0;
2321         VMA_ITERATOR(vmi, mm, 0);             << 
2322                                               << 
2323         mmap_read_lock(mm);                   << 
2324         for_each_vma(vmi, vma) {              << 
2325                 if (vma->anon_vma && !is_vm_h << 
2326                         ret = unuse_vma(vma,  << 
2327                         if (ret)              << 
2328                                 break;        << 
2329                 }                             << 
2330                                                  1300 
2331                 cond_resched();               !! 1301         if (!down_read_trylock(&mm->mmap_sem)) {
                                                   >> 1302                 /*
                                                   >> 1303                  * Activate page so shrink_inactive_list is unlikely to unmap
                                                   >> 1304                  * its ptes while lock is dropped, so swapoff can make progress.
                                                   >> 1305                  */
                                                   >> 1306                 activate_page(page);
                                                   >> 1307                 unlock_page(page);
                                                   >> 1308                 down_read(&mm->mmap_sem);
                                                   >> 1309                 lock_page(page);
2332         }                                        1310         }
2333         mmap_read_unlock(mm);                 !! 1311         for (vma = mm->mmap; vma; vma = vma->vm_next) {
2334         return ret;                           !! 1312                 if (vma->anon_vma && (ret = unuse_vma(vma, entry, page)))
                                                   >> 1313                         break;
                                                   >> 1314         }
                                                   >> 1315         up_read(&mm->mmap_sem);
                                                   >> 1316         return (ret < 0)? ret: 0;
2335 }                                                1317 }
2336                                                  1318 
2337 /*                                               1319 /*
2338  * Scan swap_map from current position to nex !! 1320  * Scan swap_map (or frontswap_map if frontswap parameter is true)
2339  * Return 0 if there are no inuse entries aft !! 1321  * from current position to next entry still in use.
2340  * the map.                                   !! 1322  * Recycle to start on reaching the end, returning 0 when empty.
2341  */                                              1323  */
2342 static unsigned int find_next_to_unuse(struct    1324 static unsigned int find_next_to_unuse(struct swap_info_struct *si,
2343                                         unsig !! 1325                                         unsigned int prev, bool frontswap)
2344 {                                                1326 {
2345         unsigned int i;                       !! 1327         unsigned int max = si->max;
                                                   >> 1328         unsigned int i = prev;
2346         unsigned char count;                     1329         unsigned char count;
2347                                                  1330 
2348         /*                                       1331         /*
2349          * No need for swap_lock here: we're     1332          * No need for swap_lock here: we're just looking
2350          * for whether an entry is in use, no    1333          * for whether an entry is in use, not modifying it; false
2351          * hits are okay, and sys_swapoff() h    1334          * hits are okay, and sys_swapoff() has already prevented new
2352          * allocations from this area (while     1335          * allocations from this area (while holding swap_lock).
2353          */                                      1336          */
2354         for (i = prev + 1; i < si->max; i++)  !! 1337         for (;;) {
                                                   >> 1338                 if (++i >= max) {
                                                   >> 1339                         if (!prev) {
                                                   >> 1340                                 i = 0;
                                                   >> 1341                                 break;
                                                   >> 1342                         }
                                                   >> 1343                         /*
                                                   >> 1344                          * No entries in use at top of swap_map,
                                                   >> 1345                          * loop back to start and recheck there.
                                                   >> 1346                          */
                                                   >> 1347                         max = prev + 1;
                                                   >> 1348                         prev = 0;
                                                   >> 1349                         i = 1;
                                                   >> 1350                 }
                                                   >> 1351                 if (frontswap) {
                                                   >> 1352                         if (frontswap_test(si, i))
                                                   >> 1353                                 break;
                                                   >> 1354                         else
                                                   >> 1355                                 continue;
                                                   >> 1356                 }
2355                 count = READ_ONCE(si->swap_ma    1357                 count = READ_ONCE(si->swap_map[i]);
2356                 if (count && swap_count(count    1358                 if (count && swap_count(count) != SWAP_MAP_BAD)
2357                         break;                   1359                         break;
2358                 if ((i % LATENCY_LIMIT) == 0) << 
2359                         cond_resched();       << 
2360         }                                        1360         }
2361                                               << 
2362         if (i == si->max)                     << 
2363                 i = 0;                        << 
2364                                               << 
2365         return i;                                1361         return i;
2366 }                                                1362 }
2367                                                  1363 
2368 static int try_to_unuse(unsigned int type)    !! 1364 /*
                                                   >> 1365  * We completely avoid races by reading each swap page in advance,
                                                   >> 1366  * and then search for the process using it.  All the necessary
                                                   >> 1367  * page table adjustments can then be made atomically.
                                                   >> 1368  *
                                                   >> 1369  * if the boolean frontswap is true, only unuse pages_to_unuse pages;
                                                   >> 1370  * pages_to_unuse==0 means all pages; ignored if frontswap is false
                                                   >> 1371  */
                                                   >> 1372 int try_to_unuse(unsigned int type, bool frontswap,
                                                   >> 1373                  unsigned long pages_to_unuse)
2369 {                                                1374 {
2370         struct mm_struct *prev_mm;            << 
2371         struct mm_struct *mm;                 << 
2372         struct list_head *p;                  << 
2373         int retval = 0;                       << 
2374         struct swap_info_struct *si = swap_in    1375         struct swap_info_struct *si = swap_info[type];
2375         struct folio *folio;                  !! 1376         struct mm_struct *start_mm;
                                                   >> 1377         volatile unsigned char *swap_map; /* swap_map is accessed without
                                                   >> 1378                                            * locking. Mark it as volatile
                                                   >> 1379                                            * to prevent compiler doing
                                                   >> 1380                                            * something odd.
                                                   >> 1381                                            */
                                                   >> 1382         unsigned char swcount;
                                                   >> 1383         struct page *page;
2376         swp_entry_t entry;                       1384         swp_entry_t entry;
2377         unsigned int i;                       !! 1385         unsigned int i = 0;
                                                   >> 1386         int retval = 0;
2378                                                  1387 
2379         if (!READ_ONCE(si->inuse_pages))      !! 1388         /*
2380                 goto success;                 !! 1389          * When searching mms for an entry, a good strategy is to
                                                   >> 1390          * start at the first mm we freed the previous entry from
                                                   >> 1391          * (though actually we don't notice whether we or coincidence
                                                   >> 1392          * freed the entry).  Initialize this start_mm with a hold.
                                                   >> 1393          *
                                                   >> 1394          * A simpler strategy would be to start at the last mm we
                                                   >> 1395          * freed the previous entry from; but that would take less
                                                   >> 1396          * advantage of mmlist ordering, which clusters forked mms
                                                   >> 1397          * together, child after parent.  If we race with dup_mmap(), we
                                                   >> 1398          * prefer to resolve parent before child, lest we miss entries
                                                   >> 1399          * duplicated after we scanned child: using last mm would invert
                                                   >> 1400          * that.
                                                   >> 1401          */
                                                   >> 1402         start_mm = &init_mm;
                                                   >> 1403         atomic_inc(&init_mm.mm_users);
                                                   >> 1404 
                                                   >> 1405         /*
                                                   >> 1406          * Keep on scanning until all entries have gone.  Usually,
                                                   >> 1407          * one pass through swap_map is enough, but not necessarily:
                                                   >> 1408          * there are races when an instance of an entry might be missed.
                                                   >> 1409          */
                                                   >> 1410         while ((i = find_next_to_unuse(si, i, frontswap)) != 0) {
                                                   >> 1411                 if (signal_pending(current)) {
                                                   >> 1412                         retval = -EINTR;
                                                   >> 1413                         break;
                                                   >> 1414                 }
2381                                                  1415 
2382 retry:                                        !! 1416                 /*
2383         retval = shmem_unuse(type);           !! 1417                  * Get a page for the entry, using the existing swap
2384         if (retval)                           !! 1418                  * cache page if there is one.  Otherwise, get a clean
2385                 return retval;                !! 1419                  * page and read the swap into it.
                                                   >> 1420                  */
                                                   >> 1421                 swap_map = &si->swap_map[i];
                                                   >> 1422                 entry = swp_entry(type, i);
                                                   >> 1423                 page = read_swap_cache_async(entry,
                                                   >> 1424                                         GFP_HIGHUSER_MOVABLE, NULL, 0);
                                                   >> 1425                 if (!page) {
                                                   >> 1426                         /*
                                                   >> 1427                          * Either swap_duplicate() failed because entry
                                                   >> 1428                          * has been freed independently, and will not be
                                                   >> 1429                          * reused since sys_swapoff() already disabled
                                                   >> 1430                          * allocation from here, or alloc_page() failed.
                                                   >> 1431                          */
                                                   >> 1432                         swcount = *swap_map;
                                                   >> 1433                         /*
                                                   >> 1434                          * We don't hold lock here, so the swap entry could be
                                                   >> 1435                          * SWAP_MAP_BAD (when the cluster is discarding).
                                                   >> 1436                          * Instead of fail out, We can just skip the swap
                                                   >> 1437                          * entry because swapoff will wait for discarding
                                                   >> 1438                          * finish anyway.
                                                   >> 1439                          */
                                                   >> 1440                         if (!swcount || swcount == SWAP_MAP_BAD)
                                                   >> 1441                                 continue;
                                                   >> 1442                         retval = -ENOMEM;
                                                   >> 1443                         break;
                                                   >> 1444                 }
2386                                                  1445 
2387         prev_mm = &init_mm;                   !! 1446                 /*
2388         mmget(prev_mm);                       !! 1447                  * Don't hold on to start_mm if it looks like exiting.
                                                   >> 1448                  */
                                                   >> 1449                 if (atomic_read(&start_mm->mm_users) == 1) {
                                                   >> 1450                         mmput(start_mm);
                                                   >> 1451                         start_mm = &init_mm;
                                                   >> 1452                         atomic_inc(&init_mm.mm_users);
                                                   >> 1453                 }
2389                                                  1454 
2390         spin_lock(&mmlist_lock);              !! 1455                 /*
2391         p = &init_mm.mmlist;                  !! 1456                  * Wait for and lock page.  When do_swap_page races with
2392         while (READ_ONCE(si->inuse_pages) &&  !! 1457                  * try_to_unuse, do_swap_page can handle the fault much
2393                !signal_pending(current) &&    !! 1458                  * faster than try_to_unuse can locate the entry.  This
2394                (p = p->next) != &init_mm.mmli !! 1459                  * apparently redundant "wait_on_page_locked" lets try_to_unuse
                                                   >> 1460                  * defer to do_swap_page in such a case - in some tests,
                                                   >> 1461                  * do_swap_page and try_to_unuse repeatedly compete.
                                                   >> 1462                  */
                                                   >> 1463                 wait_on_page_locked(page);
                                                   >> 1464                 wait_on_page_writeback(page);
                                                   >> 1465                 lock_page(page);
                                                   >> 1466                 wait_on_page_writeback(page);
2395                                                  1467 
2396                 mm = list_entry(p, struct mm_ !! 1468                 /*
2397                 if (!mmget_not_zero(mm))      !! 1469                  * Remove all references to entry.
                                                   >> 1470                  */
                                                   >> 1471                 swcount = *swap_map;
                                                   >> 1472                 if (swap_count(swcount) == SWAP_MAP_SHMEM) {
                                                   >> 1473                         retval = shmem_unuse(entry, page);
                                                   >> 1474                         /* page has already been unlocked and released */
                                                   >> 1475                         if (retval < 0)
                                                   >> 1476                                 break;
2398                         continue;                1477                         continue;
2399                 spin_unlock(&mmlist_lock);    !! 1478                 }
2400                 mmput(prev_mm);               !! 1479                 if (swap_count(swcount) && start_mm != &init_mm)
2401                 prev_mm = mm;                 !! 1480                         retval = unuse_mm(start_mm, entry, page);
2402                 retval = unuse_mm(mm, type);  !! 1481 
2403                 if (retval) {                 !! 1482                 if (swap_count(*swap_map)) {
                                                   >> 1483                         int set_start_mm = (*swap_map >= swcount);
                                                   >> 1484                         struct list_head *p = &start_mm->mmlist;
                                                   >> 1485                         struct mm_struct *new_start_mm = start_mm;
                                                   >> 1486                         struct mm_struct *prev_mm = start_mm;
                                                   >> 1487                         struct mm_struct *mm;
                                                   >> 1488 
                                                   >> 1489                         atomic_inc(&new_start_mm->mm_users);
                                                   >> 1490                         atomic_inc(&prev_mm->mm_users);
                                                   >> 1491                         spin_lock(&mmlist_lock);
                                                   >> 1492                         while (swap_count(*swap_map) && !retval &&
                                                   >> 1493                                         (p = p->next) != &start_mm->mmlist) {
                                                   >> 1494                                 mm = list_entry(p, struct mm_struct, mmlist);
                                                   >> 1495                                 if (!atomic_inc_not_zero(&mm->mm_users))
                                                   >> 1496                                         continue;
                                                   >> 1497                                 spin_unlock(&mmlist_lock);
                                                   >> 1498                                 mmput(prev_mm);
                                                   >> 1499                                 prev_mm = mm;
                                                   >> 1500 
                                                   >> 1501                                 cond_resched();
                                                   >> 1502 
                                                   >> 1503                                 swcount = *swap_map;
                                                   >> 1504                                 if (!swap_count(swcount)) /* any usage ? */
                                                   >> 1505                                         ;
                                                   >> 1506                                 else if (mm == &init_mm)
                                                   >> 1507                                         set_start_mm = 1;
                                                   >> 1508                                 else
                                                   >> 1509                                         retval = unuse_mm(mm, entry, page);
                                                   >> 1510 
                                                   >> 1511                                 if (set_start_mm && *swap_map < swcount) {
                                                   >> 1512                                         mmput(new_start_mm);
                                                   >> 1513                                         atomic_inc(&mm->mm_users);
                                                   >> 1514                                         new_start_mm = mm;
                                                   >> 1515                                         set_start_mm = 0;
                                                   >> 1516                                 }
                                                   >> 1517                                 spin_lock(&mmlist_lock);
                                                   >> 1518                         }
                                                   >> 1519                         spin_unlock(&mmlist_lock);
2404                         mmput(prev_mm);          1520                         mmput(prev_mm);
2405                         return retval;        !! 1521                         mmput(start_mm);
                                                   >> 1522                         start_mm = new_start_mm;
                                                   >> 1523                 }
                                                   >> 1524                 if (retval) {
                                                   >> 1525                         unlock_page(page);
                                                   >> 1526                         page_cache_release(page);
                                                   >> 1527                         break;
2406                 }                                1528                 }
2407                                                  1529 
2408                 /*                               1530                 /*
2409                  * Make sure that we aren't c !! 1531                  * If a reference remains (rare), we would like to leave
2410                  * interactive performance.   !! 1532                  * the page in the swap cache; but try_to_unmap could
2411                  */                           !! 1533                  * then re-duplicate the entry once we drop page lock,
2412                 cond_resched();               !! 1534                  * so we might loop indefinitely; also, that page could
2413                 spin_lock(&mmlist_lock);      !! 1535                  * not be swapped out to other storage meanwhile.  So:
2414         }                                     !! 1536                  * delete from cache even if there's another reference,
2415         spin_unlock(&mmlist_lock);            !! 1537                  * after ensuring that the data has been saved to disk -
2416                                               !! 1538                  * since if the reference remains (rarer), it will be
2417         mmput(prev_mm);                       !! 1539                  * read from disk into another page.  Splitting into two
                                                   >> 1540                  * pages would be incorrect if swap supported "shared
                                                   >> 1541                  * private" pages, but they are handled by tmpfs files.
                                                   >> 1542                  *
                                                   >> 1543                  * Given how unuse_vma() targets one particular offset
                                                   >> 1544                  * in an anon_vma, once the anon_vma has been determined,
                                                   >> 1545                  * this splitting happens to be just what is needed to
                                                   >> 1546                  * handle where KSM pages have been swapped out: re-reading
                                                   >> 1547                  * is unnecessarily slow, but we can fix that later on.
                                                   >> 1548                  */
                                                   >> 1549                 if (swap_count(*swap_map) &&
                                                   >> 1550                      PageDirty(page) && PageSwapCache(page)) {
                                                   >> 1551                         struct writeback_control wbc = {
                                                   >> 1552                                 .sync_mode = WB_SYNC_NONE,
                                                   >> 1553                         };
2418                                                  1554 
2419         i = 0;                                !! 1555                         swap_writepage(page, &wbc);
2420         while (READ_ONCE(si->inuse_pages) &&  !! 1556                         lock_page(page);
2421                !signal_pending(current) &&    !! 1557                         wait_on_page_writeback(page);
2422                (i = find_next_to_unuse(si, i) !! 1558                 }
2423                                                  1559 
2424                 entry = swp_entry(type, i);   !! 1560                 /*
2425                 folio = filemap_get_folio(swa !! 1561                  * It is conceivable that a racing task removed this page from
2426                 if (IS_ERR(folio))            !! 1562                  * swap cache just before we acquired the page lock at the top,
2427                         continue;             !! 1563                  * or while we dropped it in unuse_mm().  The page might even
                                                   >> 1564                  * be back in swap cache on another swap area: that we must not
                                                   >> 1565                  * delete, since it may not have been written out to swap yet.
                                                   >> 1566                  */
                                                   >> 1567                 if (PageSwapCache(page) &&
                                                   >> 1568                     likely(page_private(page) == entry.val))
                                                   >> 1569                         delete_from_swap_cache(page);
2428                                                  1570 
2429                 /*                               1571                 /*
2430                  * It is conceivable that a r !! 1572                  * So we could skip searching mms once swap count went
2431                  * swap cache just before we  !! 1573                  * to 1, we did not mark any present ptes as dirty: must
2432                  * might even be back in swap !! 1574                  * mark page dirty so shrink_page_list will preserve it.
2433                  * that is okay, folio_free_s !! 1575                  */
2434                  */                           !! 1576                 SetPageDirty(page);
2435                 folio_lock(folio);            !! 1577                 unlock_page(page);
2436                 folio_wait_writeback(folio);  !! 1578                 page_cache_release(page);
2437                 folio_free_swap(folio);       << 
2438                 folio_unlock(folio);          << 
2439                 folio_put(folio);             << 
2440         }                                     << 
2441                                                  1579 
2442         /*                                    !! 1580                 /*
2443          * Lets check again to see if there a !! 1581                  * Make sure that we aren't completely killing
2444          * If yes, we would need to do retry  !! 1582                  * interactive performance.
2445          * Under global memory pressure, swap !! 1583                  */
2446          * into process space after the mmlis !! 1584                 cond_resched();
2447          *                                    !! 1585                 if (frontswap && pages_to_unuse > 0) {
2448          * Limit the number of retries? No: w !! 1586                         if (!--pages_to_unuse)
2449          * above fails, that mm is likely to  !! 1587                                 break;
2450          * exit_mmap(), which proceeds at its !! 1588                 }
2451          * and even shmem_writepage() could h << 
2452          * folio_alloc_swap(), temporarily hi << 
2453          * and robust (though cpu-intensive)  << 
2454          */                                   << 
2455         if (READ_ONCE(si->inuse_pages)) {     << 
2456                 if (!signal_pending(current)) << 
2457                         goto retry;           << 
2458                 return -EINTR;                << 
2459         }                                        1589         }
2460                                                  1590 
2461 success:                                      !! 1591         mmput(start_mm);
2462         /*                                    !! 1592         return retval;
2463          * Make sure that further cleanups af << 
2464          * after swap_range_free() reduces si << 
2465          */                                   << 
2466         smp_mb();                             << 
2467         return 0;                             << 
2468 }                                                1593 }
2469                                                  1594 
2470 /*                                               1595 /*
2471  * After a successful try_to_unuse, if no swa    1596  * After a successful try_to_unuse, if no swap is now in use, we know
2472  * we can empty the mmlist.  swap_lock must b    1597  * we can empty the mmlist.  swap_lock must be held on entry and exit.
2473  * Note that mmlist_lock nests inside swap_lo    1598  * Note that mmlist_lock nests inside swap_lock, and an mm must be
2474  * added to the mmlist just after page_duplic    1599  * added to the mmlist just after page_duplicate - before would be racy.
2475  */                                              1600  */
2476 static void drain_mmlist(void)                   1601 static void drain_mmlist(void)
2477 {                                                1602 {
2478         struct list_head *p, *next;              1603         struct list_head *p, *next;
2479         unsigned int type;                       1604         unsigned int type;
2480                                                  1605 
2481         for (type = 0; type < nr_swapfiles; t    1606         for (type = 0; type < nr_swapfiles; type++)
2482                 if (swap_info[type]->inuse_pa    1607                 if (swap_info[type]->inuse_pages)
2483                         return;                  1608                         return;
2484         spin_lock(&mmlist_lock);                 1609         spin_lock(&mmlist_lock);
2485         list_for_each_safe(p, next, &init_mm.    1610         list_for_each_safe(p, next, &init_mm.mmlist)
2486                 list_del_init(p);                1611                 list_del_init(p);
2487         spin_unlock(&mmlist_lock);               1612         spin_unlock(&mmlist_lock);
2488 }                                                1613 }
2489                                                  1614 
2490 /*                                               1615 /*
                                                   >> 1616  * Use this swapdev's extent info to locate the (PAGE_SIZE) block which
                                                   >> 1617  * corresponds to page offset for the specified swap entry.
                                                   >> 1618  * Note that the type of this function is sector_t, but it returns page offset
                                                   >> 1619  * into the bdev, not sector offset.
                                                   >> 1620  */
                                                   >> 1621 static sector_t map_swap_entry(swp_entry_t entry, struct block_device **bdev)
                                                   >> 1622 {
                                                   >> 1623         struct swap_info_struct *sis;
                                                   >> 1624         struct swap_extent *start_se;
                                                   >> 1625         struct swap_extent *se;
                                                   >> 1626         pgoff_t offset;
                                                   >> 1627 
                                                   >> 1628         sis = swap_info[swp_type(entry)];
                                                   >> 1629         *bdev = sis->bdev;
                                                   >> 1630 
                                                   >> 1631         offset = swp_offset(entry);
                                                   >> 1632         start_se = sis->curr_swap_extent;
                                                   >> 1633         se = start_se;
                                                   >> 1634 
                                                   >> 1635         for ( ; ; ) {
                                                   >> 1636                 struct list_head *lh;
                                                   >> 1637 
                                                   >> 1638                 if (se->start_page <= offset &&
                                                   >> 1639                                 offset < (se->start_page + se->nr_pages)) {
                                                   >> 1640                         return se->start_block + (offset - se->start_page);
                                                   >> 1641                 }
                                                   >> 1642                 lh = se->list.next;
                                                   >> 1643                 se = list_entry(lh, struct swap_extent, list);
                                                   >> 1644                 sis->curr_swap_extent = se;
                                                   >> 1645                 BUG_ON(se == start_se);         /* It *must* be present */
                                                   >> 1646         }
                                                   >> 1647 }
                                                   >> 1648 
                                                   >> 1649 /*
                                                   >> 1650  * Returns the page offset into bdev for the specified page's swap entry.
                                                   >> 1651  */
                                                   >> 1652 sector_t map_swap_page(struct page *page, struct block_device **bdev)
                                                   >> 1653 {
                                                   >> 1654         swp_entry_t entry;
                                                   >> 1655         entry.val = page_private(page);
                                                   >> 1656         return map_swap_entry(entry, bdev) << (PAGE_SHIFT - 9);
                                                   >> 1657 }
                                                   >> 1658 
                                                   >> 1659 /*
2491  * Free all of a swapdev's extent information    1660  * Free all of a swapdev's extent information
2492  */                                              1661  */
2493 static void destroy_swap_extents(struct swap_    1662 static void destroy_swap_extents(struct swap_info_struct *sis)
2494 {                                                1663 {
2495         while (!RB_EMPTY_ROOT(&sis->swap_exte !! 1664         while (!list_empty(&sis->first_swap_extent.list)) {
2496                 struct rb_node *rb = sis->swa !! 1665                 struct swap_extent *se;
2497                 struct swap_extent *se = rb_e << 
2498                                                  1666 
2499                 rb_erase(rb, &sis->swap_exten !! 1667                 se = list_entry(sis->first_swap_extent.list.next,
                                                   >> 1668                                 struct swap_extent, list);
                                                   >> 1669                 list_del(&se->list);
2500                 kfree(se);                       1670                 kfree(se);
2501         }                                        1671         }
2502                                                  1672 
2503         if (sis->flags & SWP_ACTIVATED) {     !! 1673         if (sis->flags & SWP_FILE) {
2504                 struct file *swap_file = sis-    1674                 struct file *swap_file = sis->swap_file;
2505                 struct address_space *mapping    1675                 struct address_space *mapping = swap_file->f_mapping;
2506                                                  1676 
2507                 sis->flags &= ~SWP_ACTIVATED; !! 1677                 sis->flags &= ~SWP_FILE;
2508                 if (mapping->a_ops->swap_deac !! 1678                 mapping->a_ops->swap_deactivate(swap_file);
2509                         mapping->a_ops->swap_ << 
2510         }                                        1679         }
2511 }                                                1680 }
2512                                                  1681 
2513 /*                                               1682 /*
2514  * Add a block range (and the corresponding p    1683  * Add a block range (and the corresponding page range) into this swapdev's
2515  * extent tree.                               !! 1684  * extent list.  The extent list is kept sorted in page order.
2516  *                                               1685  *
2517  * This function rather assumes that it is ca    1686  * This function rather assumes that it is called in ascending page order.
2518  */                                              1687  */
2519 int                                              1688 int
2520 add_swap_extent(struct swap_info_struct *sis,    1689 add_swap_extent(struct swap_info_struct *sis, unsigned long start_page,
2521                 unsigned long nr_pages, secto    1690                 unsigned long nr_pages, sector_t start_block)
2522 {                                                1691 {
2523         struct rb_node **link = &sis->swap_ex << 
2524         struct swap_extent *se;                  1692         struct swap_extent *se;
2525         struct swap_extent *new_se;              1693         struct swap_extent *new_se;
                                                   >> 1694         struct list_head *lh;
2526                                                  1695 
2527         /*                                    !! 1696         if (start_page == 0) {
2528          * place the new node at the right mo !! 1697                 se = &sis->first_swap_extent;
2529          * function is called in ascending pa !! 1698                 sis->curr_swap_extent = se;
2530          */                                   !! 1699                 se->start_page = 0;
2531         while (*link) {                       !! 1700                 se->nr_pages = nr_pages;
2532                 parent = *link;               !! 1701                 se->start_block = start_block;
2533                 link = &parent->rb_right;     !! 1702                 return 1;
2534         }                                     !! 1703         } else {
2535                                               !! 1704                 lh = sis->first_swap_extent.list.prev;  /* Highest extent */
2536         if (parent) {                         !! 1705                 se = list_entry(lh, struct swap_extent, list);
2537                 se = rb_entry(parent, struct  << 
2538                 BUG_ON(se->start_page + se->n    1706                 BUG_ON(se->start_page + se->nr_pages != start_page);
2539                 if (se->start_block + se->nr_    1707                 if (se->start_block + se->nr_pages == start_block) {
2540                         /* Merge it */           1708                         /* Merge it */
2541                         se->nr_pages += nr_pa    1709                         se->nr_pages += nr_pages;
2542                         return 0;                1710                         return 0;
2543                 }                                1711                 }
2544         }                                        1712         }
2545                                                  1713 
2546         /* No merge, insert a new extent. */  !! 1714         /*
                                                   >> 1715          * No merge.  Insert a new extent, preserving ordering.
                                                   >> 1716          */
2547         new_se = kmalloc(sizeof(*se), GFP_KER    1717         new_se = kmalloc(sizeof(*se), GFP_KERNEL);
2548         if (new_se == NULL)                      1718         if (new_se == NULL)
2549                 return -ENOMEM;                  1719                 return -ENOMEM;
2550         new_se->start_page = start_page;         1720         new_se->start_page = start_page;
2551         new_se->nr_pages = nr_pages;             1721         new_se->nr_pages = nr_pages;
2552         new_se->start_block = start_block;       1722         new_se->start_block = start_block;
2553                                                  1723 
2554         rb_link_node(&new_se->rb_node, parent !! 1724         list_add_tail(&new_se->list, &sis->first_swap_extent.list);
2555         rb_insert_color(&new_se->rb_node, &si << 
2556         return 1;                                1725         return 1;
2557 }                                                1726 }
2558 EXPORT_SYMBOL_GPL(add_swap_extent);           << 
2559                                                  1727 
2560 /*                                               1728 /*
2561  * A `swap extent' is a simple thing which ma    1729  * A `swap extent' is a simple thing which maps a contiguous range of pages
2562  * onto a contiguous range of disk blocks.  A !! 1730  * onto a contiguous range of disk blocks.  An ordered list of swap extents
2563  * built at swapon time and is then used at s !! 1731  * is built at swapon time and is then used at swap_writepage/swap_readpage
2564  * time for locating where on disk a page bel    1732  * time for locating where on disk a page belongs.
2565  *                                               1733  *
2566  * If the swapfile is an S_ISBLK block device    1734  * If the swapfile is an S_ISBLK block device, a single extent is installed.
2567  * This is done so that the main operating co    1735  * This is done so that the main operating code can treat S_ISBLK and S_ISREG
2568  * swap files identically.                       1736  * swap files identically.
2569  *                                               1737  *
2570  * Whether the swapdev is an S_ISREG file or     1738  * Whether the swapdev is an S_ISREG file or an S_ISBLK blockdev, the swap
2571  * extent rbtree operates in PAGE_SIZE disk b !! 1739  * extent list operates in PAGE_SIZE disk blocks.  Both S_ISREG and S_ISBLK
2572  * swapfiles are handled *identically* after     1740  * swapfiles are handled *identically* after swapon time.
2573  *                                               1741  *
2574  * For S_ISREG swapfiles, setup_swap_extents(    1742  * For S_ISREG swapfiles, setup_swap_extents() will walk all the file's blocks
2575  * and will parse them into a rbtree, in PAGE !! 1743  * and will parse them into an ordered extent list, in PAGE_SIZE chunks.  If
2576  * blocks are found which do not fall within  !! 1744  * some stray blocks are found which do not fall within the PAGE_SIZE alignment
2577  * requirements, they are simply tossed out -    1745  * requirements, they are simply tossed out - we will never use those blocks
2578  * for swapping.                                 1746  * for swapping.
2579  *                                               1747  *
2580  * For all swap devices we set S_SWAPFILE acr !! 1748  * For S_ISREG swapfiles we set S_SWAPFILE across the life of the swapon.  This
2581  * prevents users from writing to the swap de !! 1749  * prevents root from shooting her foot off by ftruncating an in-use swapfile,
                                                   >> 1750  * which will scribble on the fs.
2582  *                                               1751  *
2583  * The amount of disk space which a single sw    1752  * The amount of disk space which a single swap extent represents varies.
2584  * Typically it is in the 1-4 megabyte range.    1753  * Typically it is in the 1-4 megabyte range.  So we can have hundreds of
2585  * extents in the rbtree. - akpm.             !! 1754  * extents in the list.  To avoid much list walking, we cache the previous
                                                   >> 1755  * search location in `curr_swap_extent', and start new searches from there.
                                                   >> 1756  * This is extremely effective.  The average number of iterations in
                                                   >> 1757  * map_swap_page() has been measured at about 0.3 per page.  - akpm.
2586  */                                              1758  */
2587 static int setup_swap_extents(struct swap_inf    1759 static int setup_swap_extents(struct swap_info_struct *sis, sector_t *span)
2588 {                                                1760 {
2589         struct file *swap_file = sis->swap_fi    1761         struct file *swap_file = sis->swap_file;
2590         struct address_space *mapping = swap_    1762         struct address_space *mapping = swap_file->f_mapping;
2591         struct inode *inode = mapping->host;     1763         struct inode *inode = mapping->host;
2592         int ret;                                 1764         int ret;
2593                                                  1765 
2594         if (S_ISBLK(inode->i_mode)) {            1766         if (S_ISBLK(inode->i_mode)) {
2595                 ret = add_swap_extent(sis, 0,    1767                 ret = add_swap_extent(sis, 0, sis->max, 0);
2596                 *span = sis->pages;              1768                 *span = sis->pages;
2597                 return ret;                      1769                 return ret;
2598         }                                        1770         }
2599                                                  1771 
2600         if (mapping->a_ops->swap_activate) {     1772         if (mapping->a_ops->swap_activate) {
2601                 ret = mapping->a_ops->swap_ac    1773                 ret = mapping->a_ops->swap_activate(sis, swap_file, span);
2602                 if (ret < 0)                  !! 1774                 if (!ret) {
2603                         return ret;           !! 1775                         sis->flags |= SWP_FILE;
2604                 sis->flags |= SWP_ACTIVATED;  !! 1776                         ret = add_swap_extent(sis, 0, sis->max, 0);
2605                 if ((sis->flags & SWP_FS_OPS) !! 1777                         *span = sis->pages;
2606                     sio_pool_init() != 0) {   << 
2607                         destroy_swap_extents( << 
2608                         return -ENOMEM;       << 
2609                 }                                1778                 }
2610                 return ret;                      1779                 return ret;
2611         }                                        1780         }
2612                                                  1781 
2613         return generic_swapfile_activate(sis,    1782         return generic_swapfile_activate(sis, swap_file, span);
2614 }                                                1783 }
2615                                                  1784 
2616 static int swap_node(struct swap_info_struct  !! 1785 static void _enable_swap_info(struct swap_info_struct *p, int prio,
2617 {                                             !! 1786                                 unsigned char *swap_map,
2618         struct block_device *bdev;            !! 1787                                 struct swap_cluster_info *cluster_info)
2619                                               << 
2620         if (si->bdev)                         << 
2621                 bdev = si->bdev;              << 
2622         else                                  << 
2623                 bdev = si->swap_file->f_inode << 
2624                                               << 
2625         return bdev ? bdev->bd_disk->node_id  << 
2626 }                                             << 
2627                                               << 
2628 static void setup_swap_info(struct swap_info_ << 
2629                             unsigned char *sw << 
2630                             struct swap_clust << 
2631                             unsigned long *ze << 
2632 {                                                1788 {
2633         int i;                                << 
2634                                               << 
2635         if (prio >= 0)                           1789         if (prio >= 0)
2636                 si->prio = prio;              !! 1790                 p->prio = prio;
2637         else                                     1791         else
2638                 si->prio = --least_priority;  !! 1792                 p->prio = --least_priority;
2639         /*                                       1793         /*
2640          * the plist prio is negated because     1794          * the plist prio is negated because plist ordering is
2641          * low-to-high, while swap ordering i    1795          * low-to-high, while swap ordering is high-to-low
2642          */                                      1796          */
2643         si->list.prio = -si->prio;            !! 1797         p->list.prio = -p->prio;
2644         for_each_node(i) {                    !! 1798         p->avail_list.prio = -p->prio;
2645                 if (si->prio >= 0)            !! 1799         p->swap_map = swap_map;
2646                         si->avail_lists[i].pr !! 1800         p->cluster_info = cluster_info;
2647                 else {                        !! 1801         p->flags |= SWP_WRITEOK;
2648                         if (swap_node(si) ==  !! 1802         atomic_long_add(p->pages, &nr_swap_pages);
2649                                 si->avail_lis !! 1803         total_swap_pages += p->pages;
2650                         else                  << 
2651                                 si->avail_lis << 
2652                 }                             << 
2653         }                                     << 
2654         si->swap_map = swap_map;              << 
2655         si->cluster_info = cluster_info;      << 
2656         si->zeromap = zeromap;                << 
2657 }                                             << 
2658                                               << 
2659 static void _enable_swap_info(struct swap_inf << 
2660 {                                             << 
2661         si->flags |= SWP_WRITEOK;             << 
2662         atomic_long_add(si->pages, &nr_swap_p << 
2663         total_swap_pages += si->pages;        << 
2664                                                  1804 
2665         assert_spin_locked(&swap_lock);          1805         assert_spin_locked(&swap_lock);
2666         /*                                       1806         /*
2667          * both lists are plists, and thus pr    1807          * both lists are plists, and thus priority ordered.
2668          * swap_active_head needs to be prior    1808          * swap_active_head needs to be priority ordered for swapoff(),
2669          * which on removal of any swap_info_    1809          * which on removal of any swap_info_struct with an auto-assigned
2670          * (i.e. negative) priority increment    1810          * (i.e. negative) priority increments the auto-assigned priority
2671          * of any lower-priority swap_info_st    1811          * of any lower-priority swap_info_structs.
2672          * swap_avail_head needs to be priori !! 1812          * swap_avail_head needs to be priority ordered for get_swap_page(),
2673          * which allocates swap pages from th    1813          * which allocates swap pages from the highest available priority
2674          * swap_info_struct.                     1814          * swap_info_struct.
2675          */                                      1815          */
2676         plist_add(&si->list, &swap_active_hea !! 1816         plist_add(&p->list, &swap_active_head);
2677                                               !! 1817         spin_lock(&swap_avail_lock);
2678         /* add to available list iff swap dev !! 1818         plist_add(&p->avail_list, &swap_avail_head);
2679         if (si->highest_bit)                  !! 1819         spin_unlock(&swap_avail_lock);
2680                 add_to_avail_list(si);        << 
2681 }                                                1820 }
2682                                                  1821 
2683 static void enable_swap_info(struct swap_info !! 1822 static void enable_swap_info(struct swap_info_struct *p, int prio,
2684                                 unsigned char    1823                                 unsigned char *swap_map,
2685                                 struct swap_c    1824                                 struct swap_cluster_info *cluster_info,
2686                                 unsigned long !! 1825                                 unsigned long *frontswap_map)
2687 {                                                1826 {
                                                   >> 1827         frontswap_init(p->type, frontswap_map);
2688         spin_lock(&swap_lock);                   1828         spin_lock(&swap_lock);
2689         spin_lock(&si->lock);                 !! 1829         spin_lock(&p->lock);
2690         setup_swap_info(si, prio, swap_map, c !! 1830          _enable_swap_info(p, prio, swap_map, cluster_info);
2691         spin_unlock(&si->lock);               !! 1831         spin_unlock(&p->lock);
2692         spin_unlock(&swap_lock);              << 
2693         /*                                    << 
2694          * Finished initializing swap device, << 
2695          */                                   << 
2696         percpu_ref_resurrect(&si->users);     << 
2697         spin_lock(&swap_lock);                << 
2698         spin_lock(&si->lock);                 << 
2699         _enable_swap_info(si);                << 
2700         spin_unlock(&si->lock);               << 
2701         spin_unlock(&swap_lock);              << 
2702 }                                             << 
2703                                               << 
2704 static void reinsert_swap_info(struct swap_in << 
2705 {                                             << 
2706         spin_lock(&swap_lock);                << 
2707         spin_lock(&si->lock);                 << 
2708         setup_swap_info(si, si->prio, si->swa << 
2709         _enable_swap_info(si);                << 
2710         spin_unlock(&si->lock);               << 
2711         spin_unlock(&swap_lock);                 1832         spin_unlock(&swap_lock);
2712 }                                                1833 }
2713                                                  1834 
2714 static bool __has_usable_swap(void)           !! 1835 static void reinsert_swap_info(struct swap_info_struct *p)
2715 {                                             << 
2716         return !plist_head_empty(&swap_active << 
2717 }                                             << 
2718                                               << 
2719 bool has_usable_swap(void)                    << 
2720 {                                                1836 {
2721         bool ret;                             << 
2722                                               << 
2723         spin_lock(&swap_lock);                   1837         spin_lock(&swap_lock);
2724         ret = __has_usable_swap();            !! 1838         spin_lock(&p->lock);
                                                   >> 1839         _enable_swap_info(p, p->prio, p->swap_map, p->cluster_info);
                                                   >> 1840         spin_unlock(&p->lock);
2725         spin_unlock(&swap_lock);                 1841         spin_unlock(&swap_lock);
2726         return ret;                           << 
2727 }                                                1842 }
2728                                                  1843 
2729 SYSCALL_DEFINE1(swapoff, const char __user *,    1844 SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
2730 {                                                1845 {
2731         struct swap_info_struct *p = NULL;       1846         struct swap_info_struct *p = NULL;
2732         unsigned char *swap_map;                 1847         unsigned char *swap_map;
2733         unsigned long *zeromap;               << 
2734         struct swap_cluster_info *cluster_inf    1848         struct swap_cluster_info *cluster_info;
                                                   >> 1849         unsigned long *frontswap_map;
2735         struct file *swap_file, *victim;         1850         struct file *swap_file, *victim;
2736         struct address_space *mapping;           1851         struct address_space *mapping;
2737         struct inode *inode;                     1852         struct inode *inode;
2738         struct filename *pathname;               1853         struct filename *pathname;
2739         int err, found = 0;                      1854         int err, found = 0;
                                                   >> 1855         unsigned int old_block_size;
2740                                                  1856 
2741         if (!capable(CAP_SYS_ADMIN))             1857         if (!capable(CAP_SYS_ADMIN))
2742                 return -EPERM;                   1858                 return -EPERM;
2743                                                  1859 
2744         BUG_ON(!current->mm);                    1860         BUG_ON(!current->mm);
2745                                                  1861 
2746         pathname = getname(specialfile);         1862         pathname = getname(specialfile);
2747         if (IS_ERR(pathname))                    1863         if (IS_ERR(pathname))
2748                 return PTR_ERR(pathname);        1864                 return PTR_ERR(pathname);
2749                                                  1865 
2750         victim = file_open_name(pathname, O_R    1866         victim = file_open_name(pathname, O_RDWR|O_LARGEFILE, 0);
2751         err = PTR_ERR(victim);                   1867         err = PTR_ERR(victim);
2752         if (IS_ERR(victim))                      1868         if (IS_ERR(victim))
2753                 goto out;                        1869                 goto out;
2754                                                  1870 
2755         mapping = victim->f_mapping;             1871         mapping = victim->f_mapping;
2756         spin_lock(&swap_lock);                   1872         spin_lock(&swap_lock);
2757         plist_for_each_entry(p, &swap_active_    1873         plist_for_each_entry(p, &swap_active_head, list) {
2758                 if (p->flags & SWP_WRITEOK) {    1874                 if (p->flags & SWP_WRITEOK) {
2759                         if (p->swap_file->f_m    1875                         if (p->swap_file->f_mapping == mapping) {
2760                                 found = 1;       1876                                 found = 1;
2761                                 break;           1877                                 break;
2762                         }                        1878                         }
2763                 }                                1879                 }
2764         }                                        1880         }
2765         if (!found) {                            1881         if (!found) {
2766                 err = -EINVAL;                   1882                 err = -EINVAL;
2767                 spin_unlock(&swap_lock);         1883                 spin_unlock(&swap_lock);
2768                 goto out_dput;                   1884                 goto out_dput;
2769         }                                        1885         }
2770         if (!security_vm_enough_memory_mm(cur    1886         if (!security_vm_enough_memory_mm(current->mm, p->pages))
2771                 vm_unacct_memory(p->pages);      1887                 vm_unacct_memory(p->pages);
2772         else {                                   1888         else {
2773                 err = -ENOMEM;                   1889                 err = -ENOMEM;
2774                 spin_unlock(&swap_lock);         1890                 spin_unlock(&swap_lock);
2775                 goto out_dput;                   1891                 goto out_dput;
2776         }                                        1892         }
                                                   >> 1893         spin_lock(&swap_avail_lock);
                                                   >> 1894         plist_del(&p->avail_list, &swap_avail_head);
                                                   >> 1895         spin_unlock(&swap_avail_lock);
2777         spin_lock(&p->lock);                     1896         spin_lock(&p->lock);
2778         del_from_avail_list(p);               << 
2779         if (p->prio < 0) {                       1897         if (p->prio < 0) {
2780                 struct swap_info_struct *si =    1898                 struct swap_info_struct *si = p;
2781                 int nid;                      << 
2782                                                  1899 
2783                 plist_for_each_entry_continue    1900                 plist_for_each_entry_continue(si, &swap_active_head, list) {
2784                         si->prio++;              1901                         si->prio++;
2785                         si->list.prio--;         1902                         si->list.prio--;
2786                         for_each_node(nid) {  !! 1903                         si->avail_list.prio--;
2787                                 if (si->avail << 
2788                                         si->a << 
2789                         }                     << 
2790                 }                                1904                 }
2791                 least_priority++;                1905                 least_priority++;
2792         }                                        1906         }
2793         plist_del(&p->list, &swap_active_head    1907         plist_del(&p->list, &swap_active_head);
2794         atomic_long_sub(p->pages, &nr_swap_pa    1908         atomic_long_sub(p->pages, &nr_swap_pages);
2795         total_swap_pages -= p->pages;            1909         total_swap_pages -= p->pages;
2796         p->flags &= ~SWP_WRITEOK;                1910         p->flags &= ~SWP_WRITEOK;
2797         spin_unlock(&p->lock);                   1911         spin_unlock(&p->lock);
2798         spin_unlock(&swap_lock);                 1912         spin_unlock(&swap_lock);
2799                                                  1913 
2800         disable_swap_slots_cache_lock();      << 
2801                                               << 
2802         set_current_oom_origin();                1914         set_current_oom_origin();
2803         err = try_to_unuse(p->type);          !! 1915         err = try_to_unuse(p->type, false, 0); /* force unuse all pages */
2804         clear_current_oom_origin();              1916         clear_current_oom_origin();
2805                                                  1917 
2806         if (err) {                               1918         if (err) {
2807                 /* re-insert swap space back     1919                 /* re-insert swap space back into swap_list */
2808                 reinsert_swap_info(p);           1920                 reinsert_swap_info(p);
2809                 reenable_swap_slots_cache_unl << 
2810                 goto out_dput;                   1921                 goto out_dput;
2811         }                                        1922         }
2812                                                  1923 
2813         reenable_swap_slots_cache_unlock();   << 
2814                                               << 
2815         /*                                    << 
2816          * Wait for swap operations protected << 
2817          * to complete.  Because of synchroni << 
2818          * operations protected by RCU reader << 
2819          * spinlock) will be waited too.  Thi << 
2820          * prevent folio_test_swapcache() and << 
2821          * operations from racing with swapof << 
2822          */                                   << 
2823         percpu_ref_kill(&p->users);           << 
2824         synchronize_rcu();                    << 
2825         wait_for_completion(&p->comp);        << 
2826                                               << 
2827         flush_work(&p->discard_work);            1924         flush_work(&p->discard_work);
2828         flush_work(&p->reclaim_work);         << 
2829                                                  1925 
2830         destroy_swap_extents(p);                 1926         destroy_swap_extents(p);
2831         if (p->flags & SWP_CONTINUED)            1927         if (p->flags & SWP_CONTINUED)
2832                 free_swap_count_continuations    1928                 free_swap_count_continuations(p);
2833                                                  1929 
2834         if (!p->bdev || !bdev_nonrot(p->bdev) << 
2835                 atomic_dec(&nr_rotate_swap);  << 
2836                                               << 
2837         mutex_lock(&swapon_mutex);               1930         mutex_lock(&swapon_mutex);
2838         spin_lock(&swap_lock);                   1931         spin_lock(&swap_lock);
2839         spin_lock(&p->lock);                     1932         spin_lock(&p->lock);
2840         drain_mmlist();                          1933         drain_mmlist();
2841                                                  1934 
2842         /* wait for anyone still in scan_swap !! 1935         /* wait for anyone still in scan_swap_map */
2843         p->highest_bit = 0;             /* cu    1936         p->highest_bit = 0;             /* cuts scans short */
2844         while (p->flags >= SWP_SCANNING) {       1937         while (p->flags >= SWP_SCANNING) {
2845                 spin_unlock(&p->lock);           1938                 spin_unlock(&p->lock);
2846                 spin_unlock(&swap_lock);         1939                 spin_unlock(&swap_lock);
2847                 schedule_timeout_uninterrupti    1940                 schedule_timeout_uninterruptible(1);
2848                 spin_lock(&swap_lock);           1941                 spin_lock(&swap_lock);
2849                 spin_lock(&p->lock);             1942                 spin_lock(&p->lock);
2850         }                                        1943         }
2851                                                  1944 
2852         swap_file = p->swap_file;                1945         swap_file = p->swap_file;
                                                   >> 1946         old_block_size = p->old_block_size;
2853         p->swap_file = NULL;                     1947         p->swap_file = NULL;
2854         p->max = 0;                              1948         p->max = 0;
2855         swap_map = p->swap_map;                  1949         swap_map = p->swap_map;
2856         p->swap_map = NULL;                      1950         p->swap_map = NULL;
2857         zeromap = p->zeromap;                 << 
2858         p->zeromap = NULL;                    << 
2859         cluster_info = p->cluster_info;          1951         cluster_info = p->cluster_info;
2860         p->cluster_info = NULL;                  1952         p->cluster_info = NULL;
                                                   >> 1953         frontswap_map = frontswap_map_get(p);
2861         spin_unlock(&p->lock);                   1954         spin_unlock(&p->lock);
2862         spin_unlock(&swap_lock);                 1955         spin_unlock(&swap_lock);
2863         arch_swap_invalidate_area(p->type);   !! 1956         frontswap_invalidate_area(p->type);
2864         zswap_swapoff(p->type);               !! 1957         frontswap_map_set(p, NULL);
2865         mutex_unlock(&swapon_mutex);             1958         mutex_unlock(&swapon_mutex);
2866         free_percpu(p->percpu_cluster);          1959         free_percpu(p->percpu_cluster);
2867         p->percpu_cluster = NULL;                1960         p->percpu_cluster = NULL;
2868         free_percpu(p->cluster_next_cpu);     << 
2869         p->cluster_next_cpu = NULL;           << 
2870         vfree(swap_map);                         1961         vfree(swap_map);
2871         kvfree(zeromap);                      !! 1962         vfree(cluster_info);
2872         kvfree(cluster_info);                 !! 1963         vfree(frontswap_map);
2873         /* Destroy swap account information *    1964         /* Destroy swap account information */
2874         swap_cgroup_swapoff(p->type);            1965         swap_cgroup_swapoff(p->type);
2875         exit_swap_address_space(p->type);     << 
2876                                                  1966 
2877         inode = mapping->host;                   1967         inode = mapping->host;
2878                                               !! 1968         if (S_ISBLK(inode->i_mode)) {
2879         inode_lock(inode);                    !! 1969                 struct block_device *bdev = I_BDEV(inode);
2880         inode->i_flags &= ~S_SWAPFILE;        !! 1970                 set_blocksize(bdev, old_block_size);
2881         inode_unlock(inode);                  !! 1971                 blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
                                                   >> 1972         } else {
                                                   >> 1973                 mutex_lock(&inode->i_mutex);
                                                   >> 1974                 inode->i_flags &= ~S_SWAPFILE;
                                                   >> 1975                 mutex_unlock(&inode->i_mutex);
                                                   >> 1976         }
2882         filp_close(swap_file, NULL);             1977         filp_close(swap_file, NULL);
2883                                                  1978 
2884         /*                                       1979         /*
2885          * Clear the SWP_USED flag after all     1980          * Clear the SWP_USED flag after all resources are freed so that swapon
2886          * can reuse this swap_info in alloc_    1981          * can reuse this swap_info in alloc_swap_info() safely.  It is ok to
2887          * not hold p->lock after we cleared     1982          * not hold p->lock after we cleared its SWP_WRITEOK.
2888          */                                      1983          */
2889         spin_lock(&swap_lock);                   1984         spin_lock(&swap_lock);
2890         p->flags = 0;                            1985         p->flags = 0;
2891         spin_unlock(&swap_lock);                 1986         spin_unlock(&swap_lock);
2892                                                  1987 
2893         err = 0;                                 1988         err = 0;
2894         atomic_inc(&proc_poll_event);            1989         atomic_inc(&proc_poll_event);
2895         wake_up_interruptible(&proc_poll_wait    1990         wake_up_interruptible(&proc_poll_wait);
2896                                                  1991 
2897 out_dput:                                        1992 out_dput:
2898         filp_close(victim, NULL);                1993         filp_close(victim, NULL);
2899 out:                                             1994 out:
2900         putname(pathname);                       1995         putname(pathname);
2901         return err;                              1996         return err;
2902 }                                                1997 }
2903                                                  1998 
2904 #ifdef CONFIG_PROC_FS                            1999 #ifdef CONFIG_PROC_FS
2905 static __poll_t swaps_poll(struct file *file, !! 2000 static unsigned swaps_poll(struct file *file, poll_table *wait)
2906 {                                                2001 {
2907         struct seq_file *seq = file->private_    2002         struct seq_file *seq = file->private_data;
2908                                                  2003 
2909         poll_wait(file, &proc_poll_wait, wait    2004         poll_wait(file, &proc_poll_wait, wait);
2910                                                  2005 
2911         if (seq->poll_event != atomic_read(&p    2006         if (seq->poll_event != atomic_read(&proc_poll_event)) {
2912                 seq->poll_event = atomic_read    2007                 seq->poll_event = atomic_read(&proc_poll_event);
2913                 return EPOLLIN | EPOLLRDNORM  !! 2008                 return POLLIN | POLLRDNORM | POLLERR | POLLPRI;
2914         }                                        2009         }
2915                                                  2010 
2916         return EPOLLIN | EPOLLRDNORM;         !! 2011         return POLLIN | POLLRDNORM;
2917 }                                                2012 }
2918                                                  2013 
2919 /* iterator */                                   2014 /* iterator */
2920 static void *swap_start(struct seq_file *swap    2015 static void *swap_start(struct seq_file *swap, loff_t *pos)
2921 {                                                2016 {
2922         struct swap_info_struct *si;             2017         struct swap_info_struct *si;
2923         int type;                                2018         int type;
2924         loff_t l = *pos;                         2019         loff_t l = *pos;
2925                                                  2020 
2926         mutex_lock(&swapon_mutex);               2021         mutex_lock(&swapon_mutex);
2927                                                  2022 
2928         if (!l)                                  2023         if (!l)
2929                 return SEQ_START_TOKEN;          2024                 return SEQ_START_TOKEN;
2930                                                  2025 
2931         for (type = 0; (si = swap_type_to_swa !! 2026         for (type = 0; type < nr_swapfiles; type++) {
                                                   >> 2027                 smp_rmb();      /* read nr_swapfiles before swap_info[type] */
                                                   >> 2028                 si = swap_info[type];
2932                 if (!(si->flags & SWP_USED) |    2029                 if (!(si->flags & SWP_USED) || !si->swap_map)
2933                         continue;                2030                         continue;
2934                 if (!--l)                        2031                 if (!--l)
2935                         return si;               2032                         return si;
2936         }                                        2033         }
2937                                                  2034 
2938         return NULL;                             2035         return NULL;
2939 }                                                2036 }
2940                                                  2037 
2941 static void *swap_next(struct seq_file *swap,    2038 static void *swap_next(struct seq_file *swap, void *v, loff_t *pos)
2942 {                                                2039 {
2943         struct swap_info_struct *si = v;         2040         struct swap_info_struct *si = v;
2944         int type;                                2041         int type;
2945                                                  2042 
2946         if (v == SEQ_START_TOKEN)                2043         if (v == SEQ_START_TOKEN)
2947                 type = 0;                        2044                 type = 0;
2948         else                                     2045         else
2949                 type = si->type + 1;             2046                 type = si->type + 1;
2950                                                  2047 
2951         ++(*pos);                             !! 2048         for (; type < nr_swapfiles; type++) {
2952         for (; (si = swap_type_to_swap_info(t !! 2049                 smp_rmb();      /* read nr_swapfiles before swap_info[type] */
                                                   >> 2050                 si = swap_info[type];
2953                 if (!(si->flags & SWP_USED) |    2051                 if (!(si->flags & SWP_USED) || !si->swap_map)
2954                         continue;                2052                         continue;
                                                   >> 2053                 ++*pos;
2955                 return si;                       2054                 return si;
2956         }                                        2055         }
2957                                                  2056 
2958         return NULL;                             2057         return NULL;
2959 }                                                2058 }
2960                                                  2059 
2961 static void swap_stop(struct seq_file *swap,     2060 static void swap_stop(struct seq_file *swap, void *v)
2962 {                                                2061 {
2963         mutex_unlock(&swapon_mutex);             2062         mutex_unlock(&swapon_mutex);
2964 }                                                2063 }
2965                                                  2064 
2966 static int swap_show(struct seq_file *swap, v    2065 static int swap_show(struct seq_file *swap, void *v)
2967 {                                                2066 {
2968         struct swap_info_struct *si = v;         2067         struct swap_info_struct *si = v;
2969         struct file *file;                       2068         struct file *file;
2970         int len;                                 2069         int len;
2971         unsigned long bytes, inuse;           << 
2972                                                  2070 
2973         if (si == SEQ_START_TOKEN) {             2071         if (si == SEQ_START_TOKEN) {
2974                 seq_puts(swap, "Filename\t\t\ !! 2072                 seq_puts(swap,"Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n");
2975                 return 0;                        2073                 return 0;
2976         }                                        2074         }
2977                                                  2075 
2978         bytes = K(si->pages);                 << 
2979         inuse = K(READ_ONCE(si->inuse_pages)) << 
2980                                               << 
2981         file = si->swap_file;                    2076         file = si->swap_file;
2982         len = seq_file_path(swap, file, " \t\    2077         len = seq_file_path(swap, file, " \t\n\\");
2983         seq_printf(swap, "%*s%s\t%lu\t%s%lu\t !! 2078         seq_printf(swap, "%*s%s\t%u\t%u\t%d\n",
2984                         len < 40 ? 40 - len :    2079                         len < 40 ? 40 - len : 1, " ",
2985                         S_ISBLK(file_inode(fi    2080                         S_ISBLK(file_inode(file)->i_mode) ?
2986                                 "partition" :    2081                                 "partition" : "file\t",
2987                         bytes, bytes < 100000 !! 2082                         si->pages << (PAGE_SHIFT - 10),
2988                         inuse, inuse < 100000 !! 2083                         si->inuse_pages << (PAGE_SHIFT - 10),
2989                         si->prio);               2084                         si->prio);
2990         return 0;                                2085         return 0;
2991 }                                                2086 }
2992                                                  2087 
2993 static const struct seq_operations swaps_op =    2088 static const struct seq_operations swaps_op = {
2994         .start =        swap_start,              2089         .start =        swap_start,
2995         .next =         swap_next,               2090         .next =         swap_next,
2996         .stop =         swap_stop,               2091         .stop =         swap_stop,
2997         .show =         swap_show                2092         .show =         swap_show
2998 };                                               2093 };
2999                                                  2094 
3000 static int swaps_open(struct inode *inode, st    2095 static int swaps_open(struct inode *inode, struct file *file)
3001 {                                                2096 {
3002         struct seq_file *seq;                    2097         struct seq_file *seq;
3003         int ret;                                 2098         int ret;
3004                                                  2099 
3005         ret = seq_open(file, &swaps_op);         2100         ret = seq_open(file, &swaps_op);
3006         if (ret)                                 2101         if (ret)
3007                 return ret;                      2102                 return ret;
3008                                                  2103 
3009         seq = file->private_data;                2104         seq = file->private_data;
3010         seq->poll_event = atomic_read(&proc_p    2105         seq->poll_event = atomic_read(&proc_poll_event);
3011         return 0;                                2106         return 0;
3012 }                                                2107 }
3013                                                  2108 
3014 static const struct proc_ops swaps_proc_ops = !! 2109 static const struct file_operations proc_swaps_operations = {
3015         .proc_flags     = PROC_ENTRY_PERMANEN !! 2110         .open           = swaps_open,
3016         .proc_open      = swaps_open,         !! 2111         .read           = seq_read,
3017         .proc_read      = seq_read,           !! 2112         .llseek         = seq_lseek,
3018         .proc_lseek     = seq_lseek,          !! 2113         .release        = seq_release,
3019         .proc_release   = seq_release,        !! 2114         .poll           = swaps_poll,
3020         .proc_poll      = swaps_poll,         << 
3021 };                                               2115 };
3022                                                  2116 
3023 static int __init procswaps_init(void)           2117 static int __init procswaps_init(void)
3024 {                                                2118 {
3025         proc_create("swaps", 0, NULL, &swaps_ !! 2119         proc_create("swaps", 0, NULL, &proc_swaps_operations);
3026         return 0;                                2120         return 0;
3027 }                                                2121 }
3028 __initcall(procswaps_init);                      2122 __initcall(procswaps_init);
3029 #endif /* CONFIG_PROC_FS */                      2123 #endif /* CONFIG_PROC_FS */
3030                                                  2124 
3031 #ifdef MAX_SWAPFILES_CHECK                       2125 #ifdef MAX_SWAPFILES_CHECK
3032 static int __init max_swapfiles_check(void)      2126 static int __init max_swapfiles_check(void)
3033 {                                                2127 {
3034         MAX_SWAPFILES_CHECK();                   2128         MAX_SWAPFILES_CHECK();
3035         return 0;                                2129         return 0;
3036 }                                                2130 }
3037 late_initcall(max_swapfiles_check);              2131 late_initcall(max_swapfiles_check);
3038 #endif                                           2132 #endif
3039                                                  2133 
3040 static struct swap_info_struct *alloc_swap_in    2134 static struct swap_info_struct *alloc_swap_info(void)
3041 {                                                2135 {
3042         struct swap_info_struct *p;              2136         struct swap_info_struct *p;
3043         struct swap_info_struct *defer = NULL << 
3044         unsigned int type;                       2137         unsigned int type;
3045         int i;                                << 
3046                                                  2138 
3047         p = kvzalloc(struct_size(p, avail_lis !! 2139         p = kzalloc(sizeof(*p), GFP_KERNEL);
3048         if (!p)                                  2140         if (!p)
3049                 return ERR_PTR(-ENOMEM);         2141                 return ERR_PTR(-ENOMEM);
3050                                                  2142 
3051         if (percpu_ref_init(&p->users, swap_u << 
3052                             PERCPU_REF_INIT_D << 
3053                 kvfree(p);                    << 
3054                 return ERR_PTR(-ENOMEM);      << 
3055         }                                     << 
3056                                               << 
3057         spin_lock(&swap_lock);                   2143         spin_lock(&swap_lock);
3058         for (type = 0; type < nr_swapfiles; t    2144         for (type = 0; type < nr_swapfiles; type++) {
3059                 if (!(swap_info[type]->flags     2145                 if (!(swap_info[type]->flags & SWP_USED))
3060                         break;                   2146                         break;
3061         }                                        2147         }
3062         if (type >= MAX_SWAPFILES) {             2148         if (type >= MAX_SWAPFILES) {
3063                 spin_unlock(&swap_lock);         2149                 spin_unlock(&swap_lock);
3064                 percpu_ref_exit(&p->users);   !! 2150                 kfree(p);
3065                 kvfree(p);                    << 
3066                 return ERR_PTR(-EPERM);          2151                 return ERR_PTR(-EPERM);
3067         }                                        2152         }
3068         if (type >= nr_swapfiles) {              2153         if (type >= nr_swapfiles) {
3069                 p->type = type;                  2154                 p->type = type;
                                                   >> 2155                 swap_info[type] = p;
3070                 /*                               2156                 /*
3071                  * Publish the swap_info_stru !! 2157                  * Write swap_info[type] before nr_swapfiles, in case a
3072                  * Note that kvzalloc() above !! 2158                  * racing procfs swap_start() or swap_next() is reading them.
                                                   >> 2159                  * (We never shrink nr_swapfiles, we never free this entry.)
3073                  */                              2160                  */
3074                 smp_store_release(&swap_info[ !! 2161                 smp_wmb();
3075                 nr_swapfiles++;                  2162                 nr_swapfiles++;
3076         } else {                                 2163         } else {
3077                 defer = p;                    !! 2164                 kfree(p);
3078                 p = swap_info[type];             2165                 p = swap_info[type];
3079                 /*                               2166                 /*
3080                  * Do not memset this entry:     2167                  * Do not memset this entry: a racing procfs swap_next()
3081                  * would be relying on p->typ    2168                  * would be relying on p->type to remain valid.
3082                  */                              2169                  */
3083         }                                        2170         }
3084         p->swap_extent_root = RB_ROOT;        !! 2171         INIT_LIST_HEAD(&p->first_swap_extent.list);
3085         plist_node_init(&p->list, 0);            2172         plist_node_init(&p->list, 0);
3086         for_each_node(i)                      !! 2173         plist_node_init(&p->avail_list, 0);
3087                 plist_node_init(&p->avail_lis << 
3088         p->flags = SWP_USED;                     2174         p->flags = SWP_USED;
3089         spin_unlock(&swap_lock);                 2175         spin_unlock(&swap_lock);
3090         if (defer) {                          << 
3091                 percpu_ref_exit(&defer->users << 
3092                 kvfree(defer);                << 
3093         }                                     << 
3094         spin_lock_init(&p->lock);                2176         spin_lock_init(&p->lock);
3095         spin_lock_init(&p->cont_lock);        << 
3096         init_completion(&p->comp);            << 
3097                                                  2177 
3098         return p;                                2178         return p;
3099 }                                                2179 }
3100                                                  2180 
3101 static int claim_swapfile(struct swap_info_st !! 2181 static int claim_swapfile(struct swap_info_struct *p, struct inode *inode)
3102 {                                                2182 {
                                                   >> 2183         int error;
                                                   >> 2184 
3103         if (S_ISBLK(inode->i_mode)) {            2185         if (S_ISBLK(inode->i_mode)) {
3104                 si->bdev = I_BDEV(inode);     !! 2186                 p->bdev = bdgrab(I_BDEV(inode));
3105                 /*                            !! 2187                 error = blkdev_get(p->bdev,
3106                  * Zoned block devices contai !! 2188                                    FMODE_READ | FMODE_WRITE | FMODE_EXCL, p);
3107                  * write only restriction.  H !! 2189                 if (error < 0) {
3108                  * suitable for swapping.  Di !! 2190                         p->bdev = NULL;
3109                  */                           !! 2191                         return error;
3110                 if (bdev_is_zoned(si->bdev))  !! 2192                 }
3111                         return -EINVAL;       !! 2193                 p->old_block_size = block_size(p->bdev);
3112                 si->flags |= SWP_BLKDEV;      !! 2194                 error = set_blocksize(p->bdev, PAGE_SIZE);
                                                   >> 2195                 if (error < 0)
                                                   >> 2196                         return error;
                                                   >> 2197                 p->flags |= SWP_BLKDEV;
3113         } else if (S_ISREG(inode->i_mode)) {     2198         } else if (S_ISREG(inode->i_mode)) {
3114                 si->bdev = inode->i_sb->s_bde !! 2199                 p->bdev = inode->i_sb->s_bdev;
3115         }                                     !! 2200                 mutex_lock(&inode->i_mutex);
                                                   >> 2201                 if (IS_SWAPFILE(inode))
                                                   >> 2202                         return -EBUSY;
                                                   >> 2203         } else
                                                   >> 2204                 return -EINVAL;
3116                                                  2205 
3117         return 0;                                2206         return 0;
3118 }                                                2207 }
3119                                                  2208 
3120                                                  2209 
3121 /*                                               2210 /*
3122  * Find out how many pages are allowed for a     2211  * Find out how many pages are allowed for a single swap device. There
3123  * are two limiting factors:                     2212  * are two limiting factors:
3124  * 1) the number of bits for the swap offset     2213  * 1) the number of bits for the swap offset in the swp_entry_t type, and
3125  * 2) the number of bits in the swap pte, as     2214  * 2) the number of bits in the swap pte, as defined by the different
3126  * architectures.                                2215  * architectures.
3127  *                                               2216  *
3128  * In order to find the largest possible bit     2217  * In order to find the largest possible bit mask, a swap entry with
3129  * swap type 0 and swap offset ~0UL is create    2218  * swap type 0 and swap offset ~0UL is created, encoded to a swap pte,
3130  * decoded to a swp_entry_t again, and finall    2219  * decoded to a swp_entry_t again, and finally the swap offset is
3131  * extracted.                                    2220  * extracted.
3132  *                                               2221  *
3133  * This will mask all the bits from the initi    2222  * This will mask all the bits from the initial ~0UL mask that can't
3134  * be encoded in either the swp_entry_t or th    2223  * be encoded in either the swp_entry_t or the architecture definition
3135  * of a swap pte.                                2224  * of a swap pte.
3136  */                                              2225  */
3137 unsigned long generic_max_swapfile_size(void)    2226 unsigned long generic_max_swapfile_size(void)
3138 {                                                2227 {
3139         return swp_offset(pte_to_swp_entry(      2228         return swp_offset(pte_to_swp_entry(
3140                         swp_entry_to_pte(swp_    2229                         swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1;
3141 }                                                2230 }
3142                                                  2231 
3143 /* Can be overridden by an architecture for a    2232 /* Can be overridden by an architecture for additional checks. */
3144 __weak unsigned long arch_max_swapfile_size(v !! 2233 __weak unsigned long max_swapfile_size(void)
3145 {                                                2234 {
3146         return generic_max_swapfile_size();      2235         return generic_max_swapfile_size();
3147 }                                                2236 }
3148                                                  2237 
3149 static unsigned long read_swap_header(struct  !! 2238 static unsigned long read_swap_header(struct swap_info_struct *p,
3150                                         union    2239                                         union swap_header *swap_header,
3151                                         struc    2240                                         struct inode *inode)
3152 {                                                2241 {
3153         int i;                                   2242         int i;
3154         unsigned long maxpages;                  2243         unsigned long maxpages;
3155         unsigned long swapfilepages;             2244         unsigned long swapfilepages;
3156         unsigned long last_page;                 2245         unsigned long last_page;
3157                                                  2246 
3158         if (memcmp("SWAPSPACE2", swap_header-    2247         if (memcmp("SWAPSPACE2", swap_header->magic.magic, 10)) {
3159                 pr_err("Unable to find swap-s    2248                 pr_err("Unable to find swap-space signature\n");
3160                 return 0;                        2249                 return 0;
3161         }                                        2250         }
3162                                                  2251 
3163         /* swap partition endianness hack...  !! 2252         /* swap partition endianess hack... */
3164         if (swab32(swap_header->info.version)    2253         if (swab32(swap_header->info.version) == 1) {
3165                 swab32s(&swap_header->info.ve    2254                 swab32s(&swap_header->info.version);
3166                 swab32s(&swap_header->info.la    2255                 swab32s(&swap_header->info.last_page);
3167                 swab32s(&swap_header->info.nr    2256                 swab32s(&swap_header->info.nr_badpages);
3168                 if (swap_header->info.nr_badp    2257                 if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES)
3169                         return 0;                2258                         return 0;
3170                 for (i = 0; i < swap_header->    2259                 for (i = 0; i < swap_header->info.nr_badpages; i++)
3171                         swab32s(&swap_header-    2260                         swab32s(&swap_header->info.badpages[i]);
3172         }                                        2261         }
3173         /* Check the swap header's sub-versio    2262         /* Check the swap header's sub-version */
3174         if (swap_header->info.version != 1) {    2263         if (swap_header->info.version != 1) {
3175                 pr_warn("Unable to handle swa    2264                 pr_warn("Unable to handle swap header version %d\n",
3176                         swap_header->info.ver    2265                         swap_header->info.version);
3177                 return 0;                        2266                 return 0;
3178         }                                        2267         }
3179                                                  2268 
3180         si->lowest_bit  = 1;                  !! 2269         p->lowest_bit  = 1;
3181         si->cluster_next = 1;                 !! 2270         p->cluster_next = 1;
3182         si->cluster_nr = 0;                   !! 2271         p->cluster_nr = 0;
3183                                                  2272 
3184         maxpages = swapfile_maximum_size;     !! 2273         maxpages = max_swapfile_size();
3185         last_page = swap_header->info.last_pa    2274         last_page = swap_header->info.last_page;
3186         if (!last_page) {                        2275         if (!last_page) {
3187                 pr_warn("Empty swap-file\n");    2276                 pr_warn("Empty swap-file\n");
3188                 return 0;                        2277                 return 0;
3189         }                                        2278         }
3190         if (last_page > maxpages) {              2279         if (last_page > maxpages) {
3191                 pr_warn("Truncating oversized    2280                 pr_warn("Truncating oversized swap area, only using %luk out of %luk\n",
3192                         K(maxpages), K(last_p !! 2281                         maxpages << (PAGE_SHIFT - 10),
                                                   >> 2282                         last_page << (PAGE_SHIFT - 10));
3193         }                                        2283         }
3194         if (maxpages > last_page) {              2284         if (maxpages > last_page) {
3195                 maxpages = last_page + 1;        2285                 maxpages = last_page + 1;
3196                 /* p->max is an unsigned int:    2286                 /* p->max is an unsigned int: don't overflow it */
3197                 if ((unsigned int)maxpages ==    2287                 if ((unsigned int)maxpages == 0)
3198                         maxpages = UINT_MAX;     2288                         maxpages = UINT_MAX;
3199         }                                        2289         }
3200         si->highest_bit = maxpages - 1;       !! 2290         p->highest_bit = maxpages - 1;
3201                                                  2291 
3202         if (!maxpages)                           2292         if (!maxpages)
3203                 return 0;                        2293                 return 0;
3204         swapfilepages = i_size_read(inode) >>    2294         swapfilepages = i_size_read(inode) >> PAGE_SHIFT;
3205         if (swapfilepages && maxpages > swapf    2295         if (swapfilepages && maxpages > swapfilepages) {
3206                 pr_warn("Swap area shorter th    2296                 pr_warn("Swap area shorter than signature indicates\n");
3207                 return 0;                        2297                 return 0;
3208         }                                        2298         }
3209         if (swap_header->info.nr_badpages &&     2299         if (swap_header->info.nr_badpages && S_ISREG(inode->i_mode))
3210                 return 0;                        2300                 return 0;
3211         if (swap_header->info.nr_badpages > M    2301         if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES)
3212                 return 0;                        2302                 return 0;
3213                                                  2303 
3214         return maxpages;                         2304         return maxpages;
3215 }                                                2305 }
3216                                                  2306 
3217 #define SWAP_CLUSTER_INFO_COLS                !! 2307 static int setup_swap_map_and_extents(struct swap_info_struct *p,
3218         DIV_ROUND_UP(L1_CACHE_BYTES, sizeof(s << 
3219 #define SWAP_CLUSTER_SPACE_COLS               << 
3220         DIV_ROUND_UP(SWAP_ADDRESS_SPACE_PAGES << 
3221 #define SWAP_CLUSTER_COLS                     << 
3222         max_t(unsigned int, SWAP_CLUSTER_INFO << 
3223                                               << 
3224 static int setup_swap_map_and_extents(struct  << 
3225                                         union    2308                                         union swap_header *swap_header,
3226                                         unsig    2309                                         unsigned char *swap_map,
                                                   >> 2310                                         struct swap_cluster_info *cluster_info,
3227                                         unsig    2311                                         unsigned long maxpages,
3228                                         secto    2312                                         sector_t *span)
3229 {                                                2313 {
                                                   >> 2314         int i;
3230         unsigned int nr_good_pages;              2315         unsigned int nr_good_pages;
3231         unsigned long i;                      << 
3232         int nr_extents;                          2316         int nr_extents;
                                                   >> 2317         unsigned long nr_clusters = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER);
                                                   >> 2318         unsigned long idx = p->cluster_next / SWAPFILE_CLUSTER;
3233                                                  2319 
3234         nr_good_pages = maxpages - 1;   /* om    2320         nr_good_pages = maxpages - 1;   /* omit header page */
3235                                                  2321 
                                                   >> 2322         cluster_set_null(&p->free_cluster_head);
                                                   >> 2323         cluster_set_null(&p->free_cluster_tail);
                                                   >> 2324         cluster_set_null(&p->discard_cluster_head);
                                                   >> 2325         cluster_set_null(&p->discard_cluster_tail);
                                                   >> 2326 
3236         for (i = 0; i < swap_header->info.nr_    2327         for (i = 0; i < swap_header->info.nr_badpages; i++) {
3237                 unsigned int page_nr = swap_h    2328                 unsigned int page_nr = swap_header->info.badpages[i];
3238                 if (page_nr == 0 || page_nr >    2329                 if (page_nr == 0 || page_nr > swap_header->info.last_page)
3239                         return -EINVAL;          2330                         return -EINVAL;
3240                 if (page_nr < maxpages) {        2331                 if (page_nr < maxpages) {
3241                         swap_map[page_nr] = S    2332                         swap_map[page_nr] = SWAP_MAP_BAD;
3242                         nr_good_pages--;         2333                         nr_good_pages--;
                                                   >> 2334                         /*
                                                   >> 2335                          * Haven't marked the cluster free yet, no list
                                                   >> 2336                          * operation involved
                                                   >> 2337                          */
                                                   >> 2338                         inc_cluster_info_page(p, cluster_info, page_nr);
3243                 }                                2339                 }
3244         }                                        2340         }
3245                                                  2341 
                                                   >> 2342         /* Haven't marked the cluster free yet, no list operation involved */
                                                   >> 2343         for (i = maxpages; i < round_up(maxpages, SWAPFILE_CLUSTER); i++)
                                                   >> 2344                 inc_cluster_info_page(p, cluster_info, i);
                                                   >> 2345 
3246         if (nr_good_pages) {                     2346         if (nr_good_pages) {
3247                 swap_map[0] = SWAP_MAP_BAD;      2347                 swap_map[0] = SWAP_MAP_BAD;
3248                 si->max = maxpages;           !! 2348                 /*
3249                 si->pages = nr_good_pages;    !! 2349                  * Not mark the cluster free yet, no list
3250                 nr_extents = setup_swap_exten !! 2350                  * operation involved
                                                   >> 2351                  */
                                                   >> 2352                 inc_cluster_info_page(p, cluster_info, 0);
                                                   >> 2353                 p->max = maxpages;
                                                   >> 2354                 p->pages = nr_good_pages;
                                                   >> 2355                 nr_extents = setup_swap_extents(p, span);
3251                 if (nr_extents < 0)              2356                 if (nr_extents < 0)
3252                         return nr_extents;       2357                         return nr_extents;
3253                 nr_good_pages = si->pages;    !! 2358                 nr_good_pages = p->pages;
3254         }                                        2359         }
3255         if (!nr_good_pages) {                    2360         if (!nr_good_pages) {
3256                 pr_warn("Empty swap-file\n");    2361                 pr_warn("Empty swap-file\n");
3257                 return -EINVAL;                  2362                 return -EINVAL;
3258         }                                        2363         }
3259                                                  2364 
3260         return nr_extents;                    << 
3261 }                                             << 
3262                                               << 
3263 static struct swap_cluster_info *setup_cluste << 
3264                                               << 
3265                                               << 
3266 {                                             << 
3267         unsigned long nr_clusters = DIV_ROUND << 
3268         unsigned long col = si->cluster_next  << 
3269         struct swap_cluster_info *cluster_inf << 
3270         unsigned long i, j, k, idx;           << 
3271         int cpu, err = -ENOMEM;               << 
3272                                               << 
3273         cluster_info = kvcalloc(nr_clusters,  << 
3274         if (!cluster_info)                       2365         if (!cluster_info)
3275                 goto err;                     !! 2366                 return nr_extents;
3276                                               << 
3277         for (i = 0; i < nr_clusters; i++)     << 
3278                 spin_lock_init(&cluster_info[ << 
3279                                               << 
3280         si->cluster_next_cpu = alloc_percpu(u << 
3281         if (!si->cluster_next_cpu)            << 
3282                 goto err_free;                << 
3283                                               << 
3284         /* Random start position to help with << 
3285         for_each_possible_cpu(cpu)            << 
3286                 per_cpu(*si->cluster_next_cpu << 
3287                 get_random_u32_inclusive(1, s << 
3288                                               << 
3289         si->percpu_cluster = alloc_percpu(str << 
3290         if (!si->percpu_cluster)              << 
3291                 goto err_free;                << 
3292                                               << 
3293         for_each_possible_cpu(cpu) {          << 
3294                 struct percpu_cluster *cluste << 
3295                                               << 
3296                 cluster = per_cpu_ptr(si->per << 
3297                 for (i = 0; i < SWAP_NR_ORDER << 
3298                         cluster->next[i] = SW << 
3299         }                                     << 
3300                                               << 
3301         /*                                    << 
3302          * Mark unusable pages as unavailable << 
3303          * marked free yet, so no list operat << 
3304          *                                    << 
3305          * See setup_swap_map_and_extents():  << 
3306          * and the EOF part of the last clust << 
3307          */                                   << 
3308         inc_cluster_info_page(si, cluster_inf << 
3309         for (i = 0; i < swap_header->info.nr_ << 
3310                 inc_cluster_info_page(si, clu << 
3311                                       swap_he << 
3312         for (i = maxpages; i < round_up(maxpa << 
3313                 inc_cluster_info_page(si, clu << 
3314                                               << 
3315         INIT_LIST_HEAD(&si->free_clusters);   << 
3316         INIT_LIST_HEAD(&si->full_clusters);   << 
3317         INIT_LIST_HEAD(&si->discard_clusters) << 
3318                                               << 
3319         for (i = 0; i < SWAP_NR_ORDERS; i++)  << 
3320                 INIT_LIST_HEAD(&si->nonfull_c << 
3321                 INIT_LIST_HEAD(&si->frag_clus << 
3322                 si->frag_cluster_nr[i] = 0;   << 
3323         }                                     << 
3324                                                  2367 
3325         /*                                    !! 2368         for (i = 0; i < nr_clusters; i++) {
3326          * Reduce false cache line sharing be !! 2369                 if (!cluster_count(&cluster_info[idx])) {
3327          * sharing same address space.        !! 2370                         cluster_set_flag(&cluster_info[idx], CLUSTER_FLAG_FREE);
3328          */                                   !! 2371                         if (cluster_is_null(&p->free_cluster_head)) {
3329         for (k = 0; k < SWAP_CLUSTER_COLS; k+ !! 2372                                 cluster_set_next_flag(&p->free_cluster_head,
3330                 j = (k + col) % SWAP_CLUSTER_ !! 2373                                                                 idx, 0);
3331                 for (i = 0; i < DIV_ROUND_UP( !! 2374                                 cluster_set_next_flag(&p->free_cluster_tail,
3332                         struct swap_cluster_i !! 2375                                                                 idx, 0);
3333                         idx = i * SWAP_CLUSTE !! 2376                         } else {
3334                         ci = cluster_info + i !! 2377                                 unsigned int tail;
3335                         if (idx >= nr_cluster !! 2378 
3336                                 continue;     !! 2379                                 tail = cluster_next(&p->free_cluster_tail);
3337                         if (ci->count) {      !! 2380                                 cluster_set_next(&cluster_info[tail], idx);
3338                                 ci->flags = C !! 2381                                 cluster_set_next_flag(&p->free_cluster_tail,
3339                                 list_add_tail !! 2382                                                                 idx, 0);
3340                                 continue;     << 
3341                         }                        2383                         }
3342                         ci->flags = CLUSTER_F << 
3343                         list_add_tail(&ci->li << 
3344                 }                                2384                 }
                                                   >> 2385                 idx++;
                                                   >> 2386                 if (idx == nr_clusters)
                                                   >> 2387                         idx = 0;
3345         }                                        2388         }
                                                   >> 2389         return nr_extents;
                                                   >> 2390 }
                                                   >> 2391 
                                                   >> 2392 /*
                                                   >> 2393  * Helper to sys_swapon determining if a given swap
                                                   >> 2394  * backing device queue supports DISCARD operations.
                                                   >> 2395  */
                                                   >> 2396 static bool swap_discardable(struct swap_info_struct *si)
                                                   >> 2397 {
                                                   >> 2398         struct request_queue *q = bdev_get_queue(si->bdev);
3346                                                  2399 
3347         return cluster_info;                  !! 2400         if (!q || !blk_queue_discard(q))
                                                   >> 2401                 return false;
3348                                                  2402 
3349 err_free:                                     !! 2403         return true;
3350         kvfree(cluster_info);                 << 
3351 err:                                          << 
3352         return ERR_PTR(err);                  << 
3353 }                                                2404 }
3354                                                  2405 
3355 SYSCALL_DEFINE2(swapon, const char __user *,     2406 SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
3356 {                                                2407 {
3357         struct swap_info_struct *si;          !! 2408         struct swap_info_struct *p;
3358         struct filename *name;                   2409         struct filename *name;
3359         struct file *swap_file = NULL;           2410         struct file *swap_file = NULL;
3360         struct address_space *mapping;           2411         struct address_space *mapping;
3361         struct dentry *dentry;                << 
3362         int prio;                                2412         int prio;
3363         int error;                               2413         int error;
3364         union swap_header *swap_header;          2414         union swap_header *swap_header;
3365         int nr_extents;                          2415         int nr_extents;
3366         sector_t span;                           2416         sector_t span;
3367         unsigned long maxpages;                  2417         unsigned long maxpages;
3368         unsigned char *swap_map = NULL;          2418         unsigned char *swap_map = NULL;
3369         unsigned long *zeromap = NULL;        << 
3370         struct swap_cluster_info *cluster_inf    2419         struct swap_cluster_info *cluster_info = NULL;
3371         struct folio *folio = NULL;           !! 2420         unsigned long *frontswap_map = NULL;
                                                   >> 2421         struct page *page = NULL;
3372         struct inode *inode = NULL;              2422         struct inode *inode = NULL;
3373         bool inced_nr_rotate_swap = false;    << 
3374                                                  2423 
3375         if (swap_flags & ~SWAP_FLAGS_VALID)      2424         if (swap_flags & ~SWAP_FLAGS_VALID)
3376                 return -EINVAL;                  2425                 return -EINVAL;
3377                                                  2426 
3378         if (!capable(CAP_SYS_ADMIN))             2427         if (!capable(CAP_SYS_ADMIN))
3379                 return -EPERM;                   2428                 return -EPERM;
3380                                                  2429 
3381         if (!swap_avail_heads)                !! 2430         p = alloc_swap_info();
3382                 return -ENOMEM;               !! 2431         if (IS_ERR(p))
3383                                               !! 2432                 return PTR_ERR(p);
3384         si = alloc_swap_info();               << 
3385         if (IS_ERR(si))                       << 
3386                 return PTR_ERR(si);           << 
3387                                                  2433 
3388         INIT_WORK(&si->discard_work, swap_dis !! 2434         INIT_WORK(&p->discard_work, swap_discard_work);
3389         INIT_WORK(&si->reclaim_work, swap_rec << 
3390                                                  2435 
3391         name = getname(specialfile);             2436         name = getname(specialfile);
3392         if (IS_ERR(name)) {                      2437         if (IS_ERR(name)) {
3393                 error = PTR_ERR(name);           2438                 error = PTR_ERR(name);
3394                 name = NULL;                     2439                 name = NULL;
3395                 goto bad_swap;                   2440                 goto bad_swap;
3396         }                                        2441         }
3397         swap_file = file_open_name(name, O_RD !! 2442         swap_file = file_open_name(name, O_RDWR|O_LARGEFILE, 0);
3398         if (IS_ERR(swap_file)) {                 2443         if (IS_ERR(swap_file)) {
3399                 error = PTR_ERR(swap_file);      2444                 error = PTR_ERR(swap_file);
3400                 swap_file = NULL;                2445                 swap_file = NULL;
3401                 goto bad_swap;                   2446                 goto bad_swap;
3402         }                                        2447         }
3403                                                  2448 
3404         si->swap_file = swap_file;            !! 2449         p->swap_file = swap_file;
3405         mapping = swap_file->f_mapping;          2450         mapping = swap_file->f_mapping;
3406         dentry = swap_file->f_path.dentry;    << 
3407         inode = mapping->host;                   2451         inode = mapping->host;
3408                                                  2452 
3409         error = claim_swapfile(si, inode);    !! 2453         /* If S_ISREG(inode->i_mode) will do mutex_lock(&inode->i_mutex); */
                                                   >> 2454         error = claim_swapfile(p, inode);
3410         if (unlikely(error))                     2455         if (unlikely(error))
3411                 goto bad_swap;                   2456                 goto bad_swap;
3412                                                  2457 
3413         inode_lock(inode);                    << 
3414         if (d_unlinked(dentry) || cant_mount( << 
3415                 error = -ENOENT;              << 
3416                 goto bad_swap_unlock_inode;   << 
3417         }                                     << 
3418         if (IS_SWAPFILE(inode)) {             << 
3419                 error = -EBUSY;               << 
3420                 goto bad_swap_unlock_inode;   << 
3421         }                                     << 
3422                                               << 
3423         /*                                       2458         /*
3424          * Read the swap header.                 2459          * Read the swap header.
3425          */                                      2460          */
3426         if (!mapping->a_ops->read_folio) {    !! 2461         if (!mapping->a_ops->readpage) {
3427                 error = -EINVAL;                 2462                 error = -EINVAL;
3428                 goto bad_swap_unlock_inode;   !! 2463                 goto bad_swap;
3429         }                                        2464         }
3430         folio = read_mapping_folio(mapping, 0 !! 2465         page = read_mapping_page(mapping, 0, swap_file);
3431         if (IS_ERR(folio)) {                  !! 2466         if (IS_ERR(page)) {
3432                 error = PTR_ERR(folio);       !! 2467                 error = PTR_ERR(page);
3433                 goto bad_swap_unlock_inode;   !! 2468                 goto bad_swap;
3434         }                                        2469         }
3435         swap_header = kmap_local_folio(folio, !! 2470         swap_header = kmap(page);
3436                                                  2471 
3437         maxpages = read_swap_header(si, swap_ !! 2472         maxpages = read_swap_header(p, swap_header, inode);
3438         if (unlikely(!maxpages)) {               2473         if (unlikely(!maxpages)) {
3439                 error = -EINVAL;                 2474                 error = -EINVAL;
3440                 goto bad_swap_unlock_inode;   !! 2475                 goto bad_swap;
3441         }                                        2476         }
3442                                                  2477 
3443         /* OK, set up the swap map and apply     2478         /* OK, set up the swap map and apply the bad block list */
3444         swap_map = vzalloc(maxpages);            2479         swap_map = vzalloc(maxpages);
3445         if (!swap_map) {                         2480         if (!swap_map) {
3446                 error = -ENOMEM;                 2481                 error = -ENOMEM;
3447                 goto bad_swap_unlock_inode;   !! 2482                 goto bad_swap;
3448         }                                        2483         }
                                                   >> 2484         if (p->bdev && blk_queue_nonrot(bdev_get_queue(p->bdev))) {
                                                   >> 2485                 int cpu;
3449                                                  2486 
3450         error = swap_cgroup_swapon(si->type,  !! 2487                 p->flags |= SWP_SOLIDSTATE;
3451         if (error)                            !! 2488                 /*
3452                 goto bad_swap_unlock_inode;   !! 2489                  * select a random position to start with to help wear leveling
3453                                               !! 2490                  * SSD
3454         nr_extents = setup_swap_map_and_exten !! 2491                  */
3455                                               !! 2492                 p->cluster_next = 1 + (prandom_u32() % p->highest_bit);
3456         if (unlikely(nr_extents < 0)) {       << 
3457                 error = nr_extents;           << 
3458                 goto bad_swap_unlock_inode;   << 
3459         }                                     << 
3460                                                  2493 
3461         /*                                    !! 2494                 cluster_info = vzalloc(DIV_ROUND_UP(maxpages,
3462          * Use kvmalloc_array instead of bitm !! 2495                         SWAPFILE_CLUSTER) * sizeof(*cluster_info));
3463          * be above MAX_PAGE_ORDER incase of  !! 2496                 if (!cluster_info) {
3464          */                                   !! 2497                         error = -ENOMEM;
3465         zeromap = kvmalloc_array(BITS_TO_LONG !! 2498                         goto bad_swap;
3466                                     GFP_KERNE !! 2499                 }
3467         if (!zeromap) {                       !! 2500                 p->percpu_cluster = alloc_percpu(struct percpu_cluster);
3468                 error = -ENOMEM;              !! 2501                 if (!p->percpu_cluster) {
3469                 goto bad_swap_unlock_inode;   !! 2502                         error = -ENOMEM;
                                                   >> 2503                         goto bad_swap;
                                                   >> 2504                 }
                                                   >> 2505                 for_each_possible_cpu(cpu) {
                                                   >> 2506                         struct percpu_cluster *cluster;
                                                   >> 2507                         cluster = per_cpu_ptr(p->percpu_cluster, cpu);
                                                   >> 2508                         cluster_set_null(&cluster->index);
                                                   >> 2509                 }
3470         }                                        2510         }
3471                                                  2511 
3472         if (si->bdev && bdev_stable_writes(si !! 2512         error = swap_cgroup_swapon(p->type, maxpages);
3473                 si->flags |= SWP_STABLE_WRITE !! 2513         if (error)
3474                                               !! 2514                 goto bad_swap;
3475         if (si->bdev && bdev_synchronous(si-> << 
3476                 si->flags |= SWP_SYNCHRONOUS_ << 
3477                                               << 
3478         if (si->bdev && bdev_nonrot(si->bdev) << 
3479                 si->flags |= SWP_SOLIDSTATE;  << 
3480                                                  2515 
3481                 cluster_info = setup_clusters !! 2516         nr_extents = setup_swap_map_and_extents(p, swap_header, swap_map,
3482                 if (IS_ERR(cluster_info)) {   !! 2517                 cluster_info, maxpages, &span);
3483                         error = PTR_ERR(clust !! 2518         if (unlikely(nr_extents < 0)) {
3484                         cluster_info = NULL;  !! 2519                 error = nr_extents;
3485                         goto bad_swap_unlock_ !! 2520                 goto bad_swap;
3486                 }                             << 
3487         } else {                              << 
3488                 atomic_inc(&nr_rotate_swap);  << 
3489                 inced_nr_rotate_swap = true;  << 
3490         }                                        2521         }
                                                   >> 2522         /* frontswap enabled? set up bit-per-page map for frontswap */
                                                   >> 2523         if (frontswap_enabled)
                                                   >> 2524                 frontswap_map = vzalloc(BITS_TO_LONGS(maxpages) * sizeof(long));
3491                                                  2525 
3492         if ((swap_flags & SWAP_FLAG_DISCARD)  !! 2526         if (p->bdev &&(swap_flags & SWAP_FLAG_DISCARD) && swap_discardable(p)) {
3493             si->bdev && bdev_max_discard_sect << 
3494                 /*                               2527                 /*
3495                  * When discard is enabled fo    2528                  * When discard is enabled for swap with no particular
3496                  * policy flagged, we set all    2529                  * policy flagged, we set all swap discard flags here in
3497                  * order to sustain backward     2530                  * order to sustain backward compatibility with older
3498                  * swapon(8) releases.           2531                  * swapon(8) releases.
3499                  */                              2532                  */
3500                 si->flags |= (SWP_DISCARDABLE !! 2533                 p->flags |= (SWP_DISCARDABLE | SWP_AREA_DISCARD |
3501                              SWP_PAGE_DISCARD    2534                              SWP_PAGE_DISCARD);
3502                                                  2535 
3503                 /*                               2536                 /*
3504                  * By flagging sys_swapon, a     2537                  * By flagging sys_swapon, a sysadmin can tell us to
3505                  * either do single-time area    2538                  * either do single-time area discards only, or to just
3506                  * perform discards for relea    2539                  * perform discards for released swap page-clusters.
3507                  * Now it's time to adjust th    2540                  * Now it's time to adjust the p->flags accordingly.
3508                  */                              2541                  */
3509                 if (swap_flags & SWAP_FLAG_DI    2542                 if (swap_flags & SWAP_FLAG_DISCARD_ONCE)
3510                         si->flags &= ~SWP_PAG !! 2543                         p->flags &= ~SWP_PAGE_DISCARD;
3511                 else if (swap_flags & SWAP_FL    2544                 else if (swap_flags & SWAP_FLAG_DISCARD_PAGES)
3512                         si->flags &= ~SWP_ARE !! 2545                         p->flags &= ~SWP_AREA_DISCARD;
3513                                                  2546 
3514                 /* issue a swapon-time discar    2547                 /* issue a swapon-time discard if it's still required */
3515                 if (si->flags & SWP_AREA_DISC !! 2548                 if (p->flags & SWP_AREA_DISCARD) {
3516                         int err = discard_swa !! 2549                         int err = discard_swap(p);
3517                         if (unlikely(err))       2550                         if (unlikely(err))
3518                                 pr_err("swapo    2551                                 pr_err("swapon: discard_swap(%p): %d\n",
3519                                         si, e !! 2552                                         p, err);
3520                 }                                2553                 }
3521         }                                        2554         }
3522                                                  2555 
3523         error = init_swap_address_space(si->t << 
3524         if (error)                            << 
3525                 goto bad_swap_unlock_inode;   << 
3526                                               << 
3527         error = zswap_swapon(si->type, maxpag << 
3528         if (error)                            << 
3529                 goto free_swap_address_space; << 
3530                                               << 
3531         /*                                    << 
3532          * Flush any pending IO and dirty map << 
3533          * swap device.                       << 
3534          */                                   << 
3535         inode->i_flags |= S_SWAPFILE;         << 
3536         error = inode_drain_writes(inode);    << 
3537         if (error) {                          << 
3538                 inode->i_flags &= ~S_SWAPFILE << 
3539                 goto free_swap_zswap;         << 
3540         }                                     << 
3541                                               << 
3542         mutex_lock(&swapon_mutex);               2556         mutex_lock(&swapon_mutex);
3543         prio = -1;                               2557         prio = -1;
3544         if (swap_flags & SWAP_FLAG_PREFER)       2558         if (swap_flags & SWAP_FLAG_PREFER)
3545                 prio =                           2559                 prio =
3546                   (swap_flags & SWAP_FLAG_PRI    2560                   (swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT;
3547         enable_swap_info(si, prio, swap_map,  !! 2561         enable_swap_info(p, prio, swap_map, cluster_info, frontswap_map);
3548                                                  2562 
3549         pr_info("Adding %uk swap on %s.  Prio !! 2563         pr_info("Adding %uk swap on %s.  "
3550                 K(si->pages), name->name, si- !! 2564                         "Priority:%d extents:%d across:%lluk %s%s%s%s%s\n",
3551                 K((unsigned long long)span),  !! 2565                 p->pages<<(PAGE_SHIFT-10), name->name, p->prio,
3552                 (si->flags & SWP_SOLIDSTATE)  !! 2566                 nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10),
3553                 (si->flags & SWP_DISCARDABLE) !! 2567                 (p->flags & SWP_SOLIDSTATE) ? "SS" : "",
3554                 (si->flags & SWP_AREA_DISCARD !! 2568                 (p->flags & SWP_DISCARDABLE) ? "D" : "",
3555                 (si->flags & SWP_PAGE_DISCARD !! 2569                 (p->flags & SWP_AREA_DISCARD) ? "s" : "",
                                                   >> 2570                 (p->flags & SWP_PAGE_DISCARD) ? "c" : "",
                                                   >> 2571                 (frontswap_map) ? "FS" : "");
3556                                                  2572 
3557         mutex_unlock(&swapon_mutex);             2573         mutex_unlock(&swapon_mutex);
3558         atomic_inc(&proc_poll_event);            2574         atomic_inc(&proc_poll_event);
3559         wake_up_interruptible(&proc_poll_wait    2575         wake_up_interruptible(&proc_poll_wait);
3560                                                  2576 
                                                   >> 2577         if (S_ISREG(inode->i_mode))
                                                   >> 2578                 inode->i_flags |= S_SWAPFILE;
3561         error = 0;                               2579         error = 0;
3562         goto out;                                2580         goto out;
3563 free_swap_zswap:                              << 
3564         zswap_swapoff(si->type);              << 
3565 free_swap_address_space:                      << 
3566         exit_swap_address_space(si->type);    << 
3567 bad_swap_unlock_inode:                        << 
3568         inode_unlock(inode);                  << 
3569 bad_swap:                                        2581 bad_swap:
3570         free_percpu(si->percpu_cluster);      !! 2582         free_percpu(p->percpu_cluster);
3571         si->percpu_cluster = NULL;            !! 2583         p->percpu_cluster = NULL;
3572         free_percpu(si->cluster_next_cpu);    !! 2584         if (inode && S_ISBLK(inode->i_mode) && p->bdev) {
3573         si->cluster_next_cpu = NULL;          !! 2585                 set_blocksize(p->bdev, p->old_block_size);
3574         inode = NULL;                         !! 2586                 blkdev_put(p->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
3575         destroy_swap_extents(si);             !! 2587         }
3576         swap_cgroup_swapoff(si->type);        !! 2588         destroy_swap_extents(p);
                                                   >> 2589         swap_cgroup_swapoff(p->type);
3577         spin_lock(&swap_lock);                   2590         spin_lock(&swap_lock);
3578         si->swap_file = NULL;                 !! 2591         p->swap_file = NULL;
3579         si->flags = 0;                        !! 2592         p->flags = 0;
3580         spin_unlock(&swap_lock);                 2593         spin_unlock(&swap_lock);
3581         vfree(swap_map);                         2594         vfree(swap_map);
3582         kvfree(zeromap);                      !! 2595         vfree(cluster_info);
3583         kvfree(cluster_info);                 !! 2596         if (swap_file) {
3584         if (inced_nr_rotate_swap)             !! 2597                 if (inode && S_ISREG(inode->i_mode)) {
3585                 atomic_dec(&nr_rotate_swap);  !! 2598                         mutex_unlock(&inode->i_mutex);
3586         if (swap_file)                        !! 2599                         inode = NULL;
                                                   >> 2600                 }
3587                 filp_close(swap_file, NULL);     2601                 filp_close(swap_file, NULL);
                                                   >> 2602         }
3588 out:                                             2603 out:
3589         if (!IS_ERR_OR_NULL(folio))           !! 2604         if (page && !IS_ERR(page)) {
3590                 folio_release_kmap(folio, swa !! 2605                 kunmap(page);
                                                   >> 2606                 page_cache_release(page);
                                                   >> 2607         }
3591         if (name)                                2608         if (name)
3592                 putname(name);                   2609                 putname(name);
3593         if (inode)                            !! 2610         if (inode && S_ISREG(inode->i_mode))
3594                 inode_unlock(inode);          !! 2611                 mutex_unlock(&inode->i_mutex);
3595         if (!error)                           << 
3596                 enable_swap_slots_cache();    << 
3597         return error;                            2612         return error;
3598 }                                                2613 }
3599                                                  2614 
3600 void si_swapinfo(struct sysinfo *val)            2615 void si_swapinfo(struct sysinfo *val)
3601 {                                                2616 {
3602         unsigned int type;                       2617         unsigned int type;
3603         unsigned long nr_to_be_unused = 0;       2618         unsigned long nr_to_be_unused = 0;
3604                                                  2619 
3605         spin_lock(&swap_lock);                   2620         spin_lock(&swap_lock);
3606         for (type = 0; type < nr_swapfiles; t    2621         for (type = 0; type < nr_swapfiles; type++) {
3607                 struct swap_info_struct *si =    2622                 struct swap_info_struct *si = swap_info[type];
3608                                                  2623 
3609                 if ((si->flags & SWP_USED) &&    2624                 if ((si->flags & SWP_USED) && !(si->flags & SWP_WRITEOK))
3610                         nr_to_be_unused += RE !! 2625                         nr_to_be_unused += si->inuse_pages;
3611         }                                        2626         }
3612         val->freeswap = atomic_long_read(&nr_    2627         val->freeswap = atomic_long_read(&nr_swap_pages) + nr_to_be_unused;
3613         val->totalswap = total_swap_pages + n    2628         val->totalswap = total_swap_pages + nr_to_be_unused;
3614         spin_unlock(&swap_lock);                 2629         spin_unlock(&swap_lock);
3615 }                                                2630 }
3616                                                  2631 
3617 /*                                               2632 /*
3618  * Verify that nr swap entries are valid and  !! 2633  * Verify that a swap entry is valid and increment its swap map count.
3619  *                                               2634  *
3620  * Returns error code in following case.         2635  * Returns error code in following case.
3621  * - success -> 0                                2636  * - success -> 0
3622  * - swp_entry is invalid -> EINVAL              2637  * - swp_entry is invalid -> EINVAL
3623  * - swp_entry is migration entry -> EINVAL      2638  * - swp_entry is migration entry -> EINVAL
3624  * - swap-cache reference is requested but th    2639  * - swap-cache reference is requested but there is already one. -> EEXIST
3625  * - swap-cache reference is requested but th    2640  * - swap-cache reference is requested but the entry is not used. -> ENOENT
3626  * - swap-mapped reference requested but need    2641  * - swap-mapped reference requested but needs continued swap count. -> ENOMEM
3627  */                                              2642  */
3628 static int __swap_duplicate(swp_entry_t entry !! 2643 static int __swap_duplicate(swp_entry_t entry, unsigned char usage)
3629 {                                                2644 {
3630         struct swap_info_struct *si;          !! 2645         struct swap_info_struct *p;
3631         struct swap_cluster_info *ci;         !! 2646         unsigned long offset, type;
3632         unsigned long offset;                 << 
3633         unsigned char count;                     2647         unsigned char count;
3634         unsigned char has_cache;                 2648         unsigned char has_cache;
3635         int err, i;                           !! 2649         int err = -EINVAL;
3636                                                  2650 
3637         si = swp_swap_info(entry);            !! 2651         if (non_swap_entry(entry))
                                                   >> 2652                 goto out;
3638                                                  2653 
                                                   >> 2654         type = swp_type(entry);
                                                   >> 2655         if (type >= nr_swapfiles)
                                                   >> 2656                 goto bad_file;
                                                   >> 2657         p = swap_info[type];
3639         offset = swp_offset(entry);              2658         offset = swp_offset(entry);
3640         VM_WARN_ON(nr > SWAPFILE_CLUSTER - of << 
3641         VM_WARN_ON(usage == 1 && nr > 1);     << 
3642         ci = lock_cluster_or_swap_info(si, of << 
3643                                               << 
3644         err = 0;                              << 
3645         for (i = 0; i < nr; i++) {            << 
3646                 count = si->swap_map[offset + << 
3647                                                  2659 
3648                 /*                            !! 2660         spin_lock(&p->lock);
3649                  * swapin_readahead() doesn't !! 2661         if (unlikely(offset >= p->max))
3650                  * swap entry could be SWAP_M !! 2662                 goto unlock_out;
3651                  */                           << 
3652                 if (unlikely(swap_count(count << 
3653                         err = -ENOENT;        << 
3654                         goto unlock_out;      << 
3655                 }                             << 
3656                                               << 
3657                 has_cache = count & SWAP_HAS_ << 
3658                 count &= ~SWAP_HAS_CACHE;     << 
3659                                                  2663 
3660                 if (!count && !has_cache) {   !! 2664         count = p->swap_map[offset];
3661                         err = -ENOENT;        << 
3662                 } else if (usage == SWAP_HAS_ << 
3663                         if (has_cache)        << 
3664                                 err = -EEXIST << 
3665                 } else if ((count & ~COUNT_CO << 
3666                         err = -EINVAL;        << 
3667                 }                             << 
3668                                                  2665 
3669                 if (err)                      !! 2666         /*
3670                         goto unlock_out;      !! 2667          * swapin_readahead() doesn't check if a swap entry is valid, so the
                                                   >> 2668          * swap entry could be SWAP_MAP_BAD. Check here with lock held.
                                                   >> 2669          */
                                                   >> 2670         if (unlikely(swap_count(count) == SWAP_MAP_BAD)) {
                                                   >> 2671                 err = -ENOENT;
                                                   >> 2672                 goto unlock_out;
3671         }                                        2673         }
3672                                                  2674 
3673         for (i = 0; i < nr; i++) {            !! 2675         has_cache = count & SWAP_HAS_CACHE;
3674                 count = si->swap_map[offset + !! 2676         count &= ~SWAP_HAS_CACHE;
3675                 has_cache = count & SWAP_HAS_ !! 2677         err = 0;
3676                 count &= ~SWAP_HAS_CACHE;     !! 2678 
                                                   >> 2679         if (usage == SWAP_HAS_CACHE) {
3677                                                  2680 
3678                 if (usage == SWAP_HAS_CACHE)  !! 2681                 /* set SWAP_HAS_CACHE if there is no cache and entry is used */
                                                   >> 2682                 if (!has_cache && count)
3679                         has_cache = SWAP_HAS_    2683                         has_cache = SWAP_HAS_CACHE;
3680                 else if ((count & ~COUNT_CONT !! 2684                 else if (has_cache)             /* someone else added cache */
                                                   >> 2685                         err = -EEXIST;
                                                   >> 2686                 else                            /* no users remaining */
                                                   >> 2687                         err = -ENOENT;
                                                   >> 2688 
                                                   >> 2689         } else if (count || has_cache) {
                                                   >> 2690 
                                                   >> 2691                 if ((count & ~COUNT_CONTINUED) < SWAP_MAP_MAX)
3681                         count += usage;          2692                         count += usage;
3682                 else if (swap_count_continued !! 2693                 else if ((count & ~COUNT_CONTINUED) > SWAP_MAP_MAX)
                                                   >> 2694                         err = -EINVAL;
                                                   >> 2695                 else if (swap_count_continued(p, offset, count))
3683                         count = COUNT_CONTINU    2696                         count = COUNT_CONTINUED;
3684                 else {                        !! 2697                 else
3685                         /*                    << 
3686                          * Don't need to roll << 
3687                          * usage == 1, there  << 
3688                          */                   << 
3689                         err = -ENOMEM;           2698                         err = -ENOMEM;
3690                         goto unlock_out;      !! 2699         } else
3691                 }                             !! 2700                 err = -ENOENT;                  /* unused swap entry */
3692                                                  2701 
3693                 WRITE_ONCE(si->swap_map[offse !! 2702         p->swap_map[offset] = count | has_cache;
3694         }                                     << 
3695                                                  2703 
3696 unlock_out:                                      2704 unlock_out:
3697         unlock_cluster_or_swap_info(si, ci);  !! 2705         spin_unlock(&p->lock);
                                                   >> 2706 out:
3698         return err;                              2707         return err;
                                                   >> 2708 
                                                   >> 2709 bad_file:
                                                   >> 2710         pr_err("swap_dup: %s%08lx\n", Bad_file, entry.val);
                                                   >> 2711         goto out;
3699 }                                                2712 }
3700                                                  2713 
3701 /*                                               2714 /*
3702  * Help swapoff by noting that swap entry bel    2715  * Help swapoff by noting that swap entry belongs to shmem/tmpfs
3703  * (in which case its reference count is neve    2716  * (in which case its reference count is never incremented).
3704  */                                              2717  */
3705 void swap_shmem_alloc(swp_entry_t entry, int  !! 2718 void swap_shmem_alloc(swp_entry_t entry)
3706 {                                                2719 {
3707         __swap_duplicate(entry, SWAP_MAP_SHME !! 2720         __swap_duplicate(entry, SWAP_MAP_SHMEM);
3708 }                                                2721 }
3709                                                  2722 
3710 /*                                               2723 /*
3711  * Increase reference count of swap entry by     2724  * Increase reference count of swap entry by 1.
3712  * Returns 0 for success, or -ENOMEM if a swa    2725  * Returns 0 for success, or -ENOMEM if a swap_count_continuation is required
3713  * but could not be atomically allocated.  Re    2726  * but could not be atomically allocated.  Returns 0, just as if it succeeded,
3714  * if __swap_duplicate() fails for another re    2727  * if __swap_duplicate() fails for another reason (-EINVAL or -ENOENT), which
3715  * might occur if a page table entry has got     2728  * might occur if a page table entry has got corrupted.
3716  */                                              2729  */
3717 int swap_duplicate(swp_entry_t entry)            2730 int swap_duplicate(swp_entry_t entry)
3718 {                                                2731 {
3719         int err = 0;                             2732         int err = 0;
3720                                                  2733 
3721         while (!err && __swap_duplicate(entry !! 2734         while (!err && __swap_duplicate(entry, 1) == -ENOMEM)
3722                 err = add_swap_count_continua    2735                 err = add_swap_count_continuation(entry, GFP_ATOMIC);
3723         return err;                              2736         return err;
3724 }                                                2737 }
3725                                                  2738 
3726 /*                                               2739 /*
3727  * @entry: first swap entry from which we all !! 2740  * @entry: swap entry for which we allocate swap cache.
3728  *                                               2741  *
3729  * Called when allocating swap cache for exis !! 2742  * Called when allocating swap cache for existing swap entry,
3730  * This can return error codes. Returns 0 at     2743  * This can return error codes. Returns 0 at success.
3731  * -EEXIST means there is a swap cache.       !! 2744  * -EBUSY means there is a swap cache.
3732  * Note: return code is different from swap_d    2745  * Note: return code is different from swap_duplicate().
3733  */                                              2746  */
3734 int swapcache_prepare(swp_entry_t entry, int  !! 2747 int swapcache_prepare(swp_entry_t entry)
3735 {                                                2748 {
3736         return __swap_duplicate(entry, SWAP_H !! 2749         return __swap_duplicate(entry, SWAP_HAS_CACHE);
3737 }                                                2750 }
3738                                                  2751 
3739 void swapcache_clear(struct swap_info_struct  !! 2752 struct swap_info_struct *page_swap_info(struct page *page)
3740 {                                                2753 {
3741         unsigned long offset = swp_offset(ent !! 2754         swp_entry_t swap = { .val = page_private(page) };
3742                                               !! 2755         BUG_ON(!PageSwapCache(page));
3743         cluster_swap_free_nr(si, offset, nr,  !! 2756         return swap_info[swp_type(swap)];
3744 }                                             << 
3745                                               << 
3746 struct swap_info_struct *swp_swap_info(swp_en << 
3747 {                                             << 
3748         return swap_type_to_swap_info(swp_typ << 
3749 }                                                2757 }
3750                                                  2758 
3751 /*                                               2759 /*
3752  * out-of-line methods to avoid include hell. !! 2760  * out-of-line __page_file_ methods to avoid include hell.
3753  */                                              2761  */
3754 struct address_space *swapcache_mapping(struc !! 2762 struct address_space *__page_file_mapping(struct page *page)
3755 {                                                2763 {
3756         return swp_swap_info(folio->swap)->sw !! 2764         VM_BUG_ON_PAGE(!PageSwapCache(page), page);
                                                   >> 2765         return page_swap_info(page)->swap_file->f_mapping;
3757 }                                                2766 }
3758 EXPORT_SYMBOL_GPL(swapcache_mapping);         !! 2767 EXPORT_SYMBOL_GPL(__page_file_mapping);
3759                                                  2768 
3760 pgoff_t __folio_swap_cache_index(struct folio !! 2769 pgoff_t __page_file_index(struct page *page)
3761 {                                                2770 {
3762         return swap_cache_index(folio->swap); !! 2771         swp_entry_t swap = { .val = page_private(page) };
                                                   >> 2772         VM_BUG_ON_PAGE(!PageSwapCache(page), page);
                                                   >> 2773         return swp_offset(swap);
3763 }                                                2774 }
3764 EXPORT_SYMBOL_GPL(__folio_swap_cache_index);  !! 2775 EXPORT_SYMBOL_GPL(__page_file_index);
3765                                                  2776 
3766 /*                                               2777 /*
3767  * add_swap_count_continuation - called when     2778  * add_swap_count_continuation - called when a swap count is duplicated
3768  * beyond SWAP_MAP_MAX, it allocates a new pa    2779  * beyond SWAP_MAP_MAX, it allocates a new page and links that to the entry's
3769  * page of the original vmalloc'ed swap_map,     2780  * page of the original vmalloc'ed swap_map, to hold the continuation count
3770  * (for that entry and for its neighbouring P    2781  * (for that entry and for its neighbouring PAGE_SIZE swap entries).  Called
3771  * again when count is duplicated beyond SWAP    2782  * again when count is duplicated beyond SWAP_MAP_MAX * SWAP_CONT_MAX, etc.
3772  *                                               2783  *
3773  * These continuation pages are seldom refere    2784  * These continuation pages are seldom referenced: the common paths all work
3774  * on the original swap_map, only referring t    2785  * on the original swap_map, only referring to a continuation page when the
3775  * low "digit" of a count is incremented or d    2786  * low "digit" of a count is incremented or decremented through SWAP_MAP_MAX.
3776  *                                               2787  *
3777  * add_swap_count_continuation(, GFP_ATOMIC)     2788  * add_swap_count_continuation(, GFP_ATOMIC) can be called while holding
3778  * page table locks; if it fails, add_swap_co    2789  * page table locks; if it fails, add_swap_count_continuation(, GFP_KERNEL)
3779  * can be called after dropping locks.           2790  * can be called after dropping locks.
3780  */                                              2791  */
3781 int add_swap_count_continuation(swp_entry_t e    2792 int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)
3782 {                                                2793 {
3783         struct swap_info_struct *si;             2794         struct swap_info_struct *si;
3784         struct swap_cluster_info *ci;         << 
3785         struct page *head;                       2795         struct page *head;
3786         struct page *page;                       2796         struct page *page;
3787         struct page *list_page;                  2797         struct page *list_page;
3788         pgoff_t offset;                          2798         pgoff_t offset;
3789         unsigned char count;                     2799         unsigned char count;
3790         int ret = 0;                          << 
3791                                                  2800 
3792         /*                                       2801         /*
3793          * When debugging, it's easier to use    2802          * When debugging, it's easier to use __GFP_ZERO here; but it's better
3794          * for latency not to zero a page whi    2803          * for latency not to zero a page while GFP_ATOMIC and holding locks.
3795          */                                      2804          */
3796         page = alloc_page(gfp_mask | __GFP_HI    2805         page = alloc_page(gfp_mask | __GFP_HIGHMEM);
3797                                                  2806 
3798         si = get_swap_device(entry);          !! 2807         si = swap_info_get(entry);
3799         if (!si) {                               2808         if (!si) {
3800                 /*                               2809                 /*
3801                  * An acceptable race has occ    2810                  * An acceptable race has occurred since the failing
3802                  * __swap_duplicate(): the sw !! 2811                  * __swap_duplicate(): the swap entry has been freed,
                                                   >> 2812                  * perhaps even the whole swap_map cleared for swapoff.
3803                  */                              2813                  */
3804                 goto outer;                      2814                 goto outer;
3805         }                                        2815         }
3806         spin_lock(&si->lock);                 << 
3807                                                  2816 
3808         offset = swp_offset(entry);              2817         offset = swp_offset(entry);
3809                                               !! 2818         count = si->swap_map[offset] & ~SWAP_HAS_CACHE;
3810         ci = lock_cluster(si, offset);        << 
3811                                               << 
3812         count = swap_count(si->swap_map[offse << 
3813                                                  2819 
3814         if ((count & ~COUNT_CONTINUED) != SWA    2820         if ((count & ~COUNT_CONTINUED) != SWAP_MAP_MAX) {
3815                 /*                               2821                 /*
3816                  * The higher the swap count,    2822                  * The higher the swap count, the more likely it is that tasks
3817                  * will race to add swap coun    2823                  * will race to add swap count continuation: we need to avoid
3818                  * over-provisioning.            2824                  * over-provisioning.
3819                  */                              2825                  */
3820                 goto out;                        2826                 goto out;
3821         }                                        2827         }
3822                                                  2828 
3823         if (!page) {                             2829         if (!page) {
3824                 ret = -ENOMEM;                !! 2830                 spin_unlock(&si->lock);
3825                 goto out;                     !! 2831                 return -ENOMEM;
3826         }                                        2832         }
3827                                                  2833 
                                                   >> 2834         /*
                                                   >> 2835          * We are fortunate that although vmalloc_to_page uses pte_offset_map,
                                                   >> 2836          * no architecture is using highmem pages for kernel page tables: so it
                                                   >> 2837          * will not corrupt the GFP_ATOMIC caller's atomic page table kmaps.
                                                   >> 2838          */
3828         head = vmalloc_to_page(si->swap_map +    2839         head = vmalloc_to_page(si->swap_map + offset);
3829         offset &= ~PAGE_MASK;                    2840         offset &= ~PAGE_MASK;
3830                                                  2841 
3831         spin_lock(&si->cont_lock);            << 
3832         /*                                       2842         /*
3833          * Page allocation does not initializ    2843          * Page allocation does not initialize the page's lru field,
3834          * but it does always reset its priva    2844          * but it does always reset its private field.
3835          */                                      2845          */
3836         if (!page_private(head)) {               2846         if (!page_private(head)) {
3837                 BUG_ON(count & COUNT_CONTINUE    2847                 BUG_ON(count & COUNT_CONTINUED);
3838                 INIT_LIST_HEAD(&head->lru);      2848                 INIT_LIST_HEAD(&head->lru);
3839                 set_page_private(head, SWP_CO    2849                 set_page_private(head, SWP_CONTINUED);
3840                 si->flags |= SWP_CONTINUED;      2850                 si->flags |= SWP_CONTINUED;
3841         }                                        2851         }
3842                                                  2852 
3843         list_for_each_entry(list_page, &head-    2853         list_for_each_entry(list_page, &head->lru, lru) {
3844                 unsigned char *map;              2854                 unsigned char *map;
3845                                                  2855 
3846                 /*                               2856                 /*
3847                  * If the previous map said n    2857                  * If the previous map said no continuation, but we've found
3848                  * a continuation page, free     2858                  * a continuation page, free our allocation and use this one.
3849                  */                              2859                  */
3850                 if (!(count & COUNT_CONTINUED    2860                 if (!(count & COUNT_CONTINUED))
3851                         goto out_unlock_cont; !! 2861                         goto out;
3852                                                  2862 
3853                 map = kmap_local_page(list_pa !! 2863                 map = kmap_atomic(list_page) + offset;
3854                 count = *map;                    2864                 count = *map;
3855                 kunmap_local(map);            !! 2865                 kunmap_atomic(map);
3856                                                  2866 
3857                 /*                               2867                 /*
3858                  * If this continuation count    2868                  * If this continuation count now has some space in it,
3859                  * free our allocation and us    2869                  * free our allocation and use this one.
3860                  */                              2870                  */
3861                 if ((count & ~COUNT_CONTINUED    2871                 if ((count & ~COUNT_CONTINUED) != SWAP_CONT_MAX)
3862                         goto out_unlock_cont; !! 2872                         goto out;
3863         }                                        2873         }
3864                                                  2874 
3865         list_add_tail(&page->lru, &head->lru)    2875         list_add_tail(&page->lru, &head->lru);
3866         page = NULL;                    /* no    2876         page = NULL;                    /* now it's attached, don't free it */
3867 out_unlock_cont:                              << 
3868         spin_unlock(&si->cont_lock);          << 
3869 out:                                             2877 out:
3870         unlock_cluster(ci);                   << 
3871         spin_unlock(&si->lock);                  2878         spin_unlock(&si->lock);
3872         put_swap_device(si);                  << 
3873 outer:                                           2879 outer:
3874         if (page)                                2880         if (page)
3875                 __free_page(page);               2881                 __free_page(page);
3876         return ret;                           !! 2882         return 0;
3877 }                                                2883 }
3878                                                  2884 
3879 /*                                               2885 /*
3880  * swap_count_continued - when the original s    2886  * swap_count_continued - when the original swap_map count is incremented
3881  * from SWAP_MAP_MAX, check if there is alrea    2887  * from SWAP_MAP_MAX, check if there is already a continuation page to carry
3882  * into, carry if so, or else fail until a ne    2888  * into, carry if so, or else fail until a new continuation page is allocated;
3883  * when the original swap_map count is decrem    2889  * when the original swap_map count is decremented from 0 with continuation,
3884  * borrow from the continuation and report wh    2890  * borrow from the continuation and report whether it still holds more.
3885  * Called while __swap_duplicate() or swap_en !! 2891  * Called while __swap_duplicate() or swap_entry_free() holds swap_lock.
3886  * lock.                                      << 
3887  */                                              2892  */
3888 static bool swap_count_continued(struct swap_    2893 static bool swap_count_continued(struct swap_info_struct *si,
3889                                  pgoff_t offs    2894                                  pgoff_t offset, unsigned char count)
3890 {                                                2895 {
3891         struct page *head;                       2896         struct page *head;
3892         struct page *page;                       2897         struct page *page;
3893         unsigned char *map;                      2898         unsigned char *map;
3894         bool ret;                             << 
3895                                                  2899 
3896         head = vmalloc_to_page(si->swap_map +    2900         head = vmalloc_to_page(si->swap_map + offset);
3897         if (page_private(head) != SWP_CONTINU    2901         if (page_private(head) != SWP_CONTINUED) {
3898                 BUG_ON(count & COUNT_CONTINUE    2902                 BUG_ON(count & COUNT_CONTINUED);
3899                 return false;           /* ne    2903                 return false;           /* need to add count continuation */
3900         }                                        2904         }
3901                                                  2905 
3902         spin_lock(&si->cont_lock);            << 
3903         offset &= ~PAGE_MASK;                    2906         offset &= ~PAGE_MASK;
3904         page = list_next_entry(head, lru);    !! 2907         page = list_entry(head->lru.next, struct page, lru);
3905         map = kmap_local_page(page) + offset; !! 2908         map = kmap_atomic(page) + offset;
3906                                                  2909 
3907         if (count == SWAP_MAP_MAX)      /* in    2910         if (count == SWAP_MAP_MAX)      /* initial increment from swap_map */
3908                 goto init_map;          /* ju    2911                 goto init_map;          /* jump over SWAP_CONT_MAX checks */
3909                                                  2912 
3910         if (count == (SWAP_MAP_MAX | COUNT_CO    2913         if (count == (SWAP_MAP_MAX | COUNT_CONTINUED)) { /* incrementing */
3911                 /*                               2914                 /*
3912                  * Think of how you add 1 to     2915                  * Think of how you add 1 to 999
3913                  */                              2916                  */
3914                 while (*map == (SWAP_CONT_MAX    2917                 while (*map == (SWAP_CONT_MAX | COUNT_CONTINUED)) {
3915                         kunmap_local(map);    !! 2918                         kunmap_atomic(map);
3916                         page = list_next_entr !! 2919                         page = list_entry(page->lru.next, struct page, lru);
3917                         BUG_ON(page == head);    2920                         BUG_ON(page == head);
3918                         map = kmap_local_page !! 2921                         map = kmap_atomic(page) + offset;
3919                 }                                2922                 }
3920                 if (*map == SWAP_CONT_MAX) {     2923                 if (*map == SWAP_CONT_MAX) {
3921                         kunmap_local(map);    !! 2924                         kunmap_atomic(map);
3922                         page = list_next_entr !! 2925                         page = list_entry(page->lru.next, struct page, lru);
3923                         if (page == head) {   !! 2926                         if (page == head)
3924                                 ret = false;  !! 2927                                 return false;   /* add count continuation */
3925                                 goto out;     !! 2928                         map = kmap_atomic(page) + offset;
3926                         }                     << 
3927                         map = kmap_local_page << 
3928 init_map:               *map = 0;                2929 init_map:               *map = 0;               /* we didn't zero the page */
3929                 }                                2930                 }
3930                 *map += 1;                       2931                 *map += 1;
3931                 kunmap_local(map);            !! 2932                 kunmap_atomic(map);
3932                 while ((page = list_prev_entr !! 2933                 page = list_entry(page->lru.prev, struct page, lru);
3933                         map = kmap_local_page !! 2934                 while (page != head) {
                                                   >> 2935                         map = kmap_atomic(page) + offset;
3934                         *map = COUNT_CONTINUE    2936                         *map = COUNT_CONTINUED;
3935                         kunmap_local(map);    !! 2937                         kunmap_atomic(map);
                                                   >> 2938                         page = list_entry(page->lru.prev, struct page, lru);
3936                 }                                2939                 }
3937                 ret = true;                   !! 2940                 return true;                    /* incremented */
3938                                                  2941 
3939         } else {                                 2942         } else {                                /* decrementing */
3940                 /*                               2943                 /*
3941                  * Think of how you subtract     2944                  * Think of how you subtract 1 from 1000
3942                  */                              2945                  */
3943                 BUG_ON(count != COUNT_CONTINU    2946                 BUG_ON(count != COUNT_CONTINUED);
3944                 while (*map == COUNT_CONTINUE    2947                 while (*map == COUNT_CONTINUED) {
3945                         kunmap_local(map);    !! 2948                         kunmap_atomic(map);
3946                         page = list_next_entr !! 2949                         page = list_entry(page->lru.next, struct page, lru);
3947                         BUG_ON(page == head);    2950                         BUG_ON(page == head);
3948                         map = kmap_local_page !! 2951                         map = kmap_atomic(page) + offset;
3949                 }                                2952                 }
3950                 BUG_ON(*map == 0);               2953                 BUG_ON(*map == 0);
3951                 *map -= 1;                       2954                 *map -= 1;
3952                 if (*map == 0)                   2955                 if (*map == 0)
3953                         count = 0;               2956                         count = 0;
3954                 kunmap_local(map);            !! 2957                 kunmap_atomic(map);
3955                 while ((page = list_prev_entr !! 2958                 page = list_entry(page->lru.prev, struct page, lru);
3956                         map = kmap_local_page !! 2959                 while (page != head) {
                                                   >> 2960                         map = kmap_atomic(page) + offset;
3957                         *map = SWAP_CONT_MAX     2961                         *map = SWAP_CONT_MAX | count;
3958                         count = COUNT_CONTINU    2962                         count = COUNT_CONTINUED;
3959                         kunmap_local(map);    !! 2963                         kunmap_atomic(map);
                                                   >> 2964                         page = list_entry(page->lru.prev, struct page, lru);
3960                 }                                2965                 }
3961                 ret = count == COUNT_CONTINUE !! 2966                 return count == COUNT_CONTINUED;
3962         }                                        2967         }
3963 out:                                          << 
3964         spin_unlock(&si->cont_lock);          << 
3965         return ret;                           << 
3966 }                                                2968 }
3967                                                  2969 
3968 /*                                               2970 /*
3969  * free_swap_count_continuations - swapoff fr    2971  * free_swap_count_continuations - swapoff free all the continuation pages
3970  * appended to the swap_map, after swap_map i    2972  * appended to the swap_map, after swap_map is quiesced, before vfree'ing it.
3971  */                                              2973  */
3972 static void free_swap_count_continuations(str    2974 static void free_swap_count_continuations(struct swap_info_struct *si)
3973 {                                                2975 {
3974         pgoff_t offset;                          2976         pgoff_t offset;
3975                                                  2977 
3976         for (offset = 0; offset < si->max; of    2978         for (offset = 0; offset < si->max; offset += PAGE_SIZE) {
3977                 struct page *head;               2979                 struct page *head;
3978                 head = vmalloc_to_page(si->sw    2980                 head = vmalloc_to_page(si->swap_map + offset);
3979                 if (page_private(head)) {        2981                 if (page_private(head)) {
3980                         struct page *page, *n !! 2982                         struct list_head *this, *next;
3981                                               !! 2983                         list_for_each_safe(this, next, &head->lru) {
3982                         list_for_each_entry_s !! 2984                                 struct page *page;
3983                                 list_del(&pag !! 2985                                 page = list_entry(this, struct page, lru);
                                                   >> 2986                                 list_del(this);
3984                                 __free_page(p    2987                                 __free_page(page);
3985                         }                        2988                         }
3986                 }                                2989                 }
3987         }                                        2990         }
3988 }                                                2991 }
3989                                               << 
3990 #if defined(CONFIG_MEMCG) && defined(CONFIG_B << 
3991 void __folio_throttle_swaprate(struct folio * << 
3992 {                                             << 
3993         struct swap_info_struct *si, *next;   << 
3994         int nid = folio_nid(folio);           << 
3995                                               << 
3996         if (!(gfp & __GFP_IO))                << 
3997                 return;                       << 
3998                                               << 
3999         if (!__has_usable_swap())             << 
4000                 return;                       << 
4001                                               << 
4002         if (!blk_cgroup_congested())          << 
4003                 return;                       << 
4004                                               << 
4005         /*                                    << 
4006          * We've already scheduled a throttle << 
4007          * lock.                              << 
4008          */                                   << 
4009         if (current->throttle_disk)           << 
4010                 return;                       << 
4011                                               << 
4012         spin_lock(&swap_avail_lock);          << 
4013         plist_for_each_entry_safe(si, next, & << 
4014                                   avail_lists << 
4015                 if (si->bdev) {               << 
4016                         blkcg_schedule_thrott << 
4017                         break;                << 
4018                 }                             << 
4019         }                                     << 
4020         spin_unlock(&swap_avail_lock);        << 
4021 }                                             << 
4022 #endif                                        << 
4023                                               << 
4024 static int __init swapfile_init(void)         << 
4025 {                                             << 
4026         int nid;                              << 
4027                                               << 
4028         swap_avail_heads = kmalloc_array(nr_n << 
4029                                          GFP_ << 
4030         if (!swap_avail_heads) {              << 
4031                 pr_emerg("Not enough memory f << 
4032                 return -ENOMEM;               << 
4033         }                                     << 
4034                                               << 
4035         for_each_node(nid)                    << 
4036                 plist_head_init(&swap_avail_h << 
4037                                               << 
4038         swapfile_maximum_size = arch_max_swap << 
4039                                               << 
4040 #ifdef CONFIG_MIGRATION                       << 
4041         if (swapfile_maximum_size >= (1UL <<  << 
4042                 swap_migration_ad_supported = << 
4043 #endif  /* CONFIG_MIGRATION */                << 
4044                                               << 
4045         return 0;                             << 
4046 }                                             << 
4047 subsys_initcall(swapfile_init);               << 
4048                                                  2992 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php