~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/fs/f2fs/segment.c

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0
  2 /*
  3  * fs/f2fs/segment.c
  4  *
  5  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  6  *             http://www.samsung.com/
  7  */
  8 #include <linux/fs.h>
  9 #include <linux/f2fs_fs.h>
 10 #include <linux/bio.h>
 11 #include <linux/blkdev.h>
 12 #include <linux/sched/mm.h>
 13 #include <linux/prefetch.h>
 14 #include <linux/kthread.h>
 15 #include <linux/swap.h>
 16 #include <linux/timer.h>
 17 #include <linux/freezer.h>
 18 #include <linux/sched/signal.h>
 19 #include <linux/random.h>
 20 
 21 #include "f2fs.h"
 22 #include "segment.h"
 23 #include "node.h"
 24 #include "gc.h"
 25 #include "iostat.h"
 26 #include <trace/events/f2fs.h>
 27 
 28 #define __reverse_ffz(x) __reverse_ffs(~(x))
 29 
 30 static struct kmem_cache *discard_entry_slab;
 31 static struct kmem_cache *discard_cmd_slab;
 32 static struct kmem_cache *sit_entry_set_slab;
 33 static struct kmem_cache *revoke_entry_slab;
 34 
 35 static unsigned long __reverse_ulong(unsigned char *str)
 36 {
 37         unsigned long tmp = 0;
 38         int shift = 24, idx = 0;
 39 
 40 #if BITS_PER_LONG == 64
 41         shift = 56;
 42 #endif
 43         while (shift >= 0) {
 44                 tmp |= (unsigned long)str[idx++] << shift;
 45                 shift -= BITS_PER_BYTE;
 46         }
 47         return tmp;
 48 }
 49 
 50 /*
 51  * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
 52  * MSB and LSB are reversed in a byte by f2fs_set_bit.
 53  */
 54 static inline unsigned long __reverse_ffs(unsigned long word)
 55 {
 56         int num = 0;
 57 
 58 #if BITS_PER_LONG == 64
 59         if ((word & 0xffffffff00000000UL) == 0)
 60                 num += 32;
 61         else
 62                 word >>= 32;
 63 #endif
 64         if ((word & 0xffff0000) == 0)
 65                 num += 16;
 66         else
 67                 word >>= 16;
 68 
 69         if ((word & 0xff00) == 0)
 70                 num += 8;
 71         else
 72                 word >>= 8;
 73 
 74         if ((word & 0xf0) == 0)
 75                 num += 4;
 76         else
 77                 word >>= 4;
 78 
 79         if ((word & 0xc) == 0)
 80                 num += 2;
 81         else
 82                 word >>= 2;
 83 
 84         if ((word & 0x2) == 0)
 85                 num += 1;
 86         return num;
 87 }
 88 
 89 /*
 90  * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because
 91  * f2fs_set_bit makes MSB and LSB reversed in a byte.
 92  * @size must be integral times of unsigned long.
 93  * Example:
 94  *                             MSB <--> LSB
 95  *   f2fs_set_bit(0, bitmap) => 1000 0000
 96  *   f2fs_set_bit(7, bitmap) => 0000 0001
 97  */
 98 static unsigned long __find_rev_next_bit(const unsigned long *addr,
 99                         unsigned long size, unsigned long offset)
100 {
101         const unsigned long *p = addr + BIT_WORD(offset);
102         unsigned long result = size;
103         unsigned long tmp;
104 
105         if (offset >= size)
106                 return size;
107 
108         size -= (offset & ~(BITS_PER_LONG - 1));
109         offset %= BITS_PER_LONG;
110 
111         while (1) {
112                 if (*p == 0)
113                         goto pass;
114 
115                 tmp = __reverse_ulong((unsigned char *)p);
116 
117                 tmp &= ~0UL >> offset;
118                 if (size < BITS_PER_LONG)
119                         tmp &= (~0UL << (BITS_PER_LONG - size));
120                 if (tmp)
121                         goto found;
122 pass:
123                 if (size <= BITS_PER_LONG)
124                         break;
125                 size -= BITS_PER_LONG;
126                 offset = 0;
127                 p++;
128         }
129         return result;
130 found:
131         return result - size + __reverse_ffs(tmp);
132 }
133 
134 static unsigned long __find_rev_next_zero_bit(const unsigned long *addr,
135                         unsigned long size, unsigned long offset)
136 {
137         const unsigned long *p = addr + BIT_WORD(offset);
138         unsigned long result = size;
139         unsigned long tmp;
140 
141         if (offset >= size)
142                 return size;
143 
144         size -= (offset & ~(BITS_PER_LONG - 1));
145         offset %= BITS_PER_LONG;
146 
147         while (1) {
148                 if (*p == ~0UL)
149                         goto pass;
150 
151                 tmp = __reverse_ulong((unsigned char *)p);
152 
153                 if (offset)
154                         tmp |= ~0UL << (BITS_PER_LONG - offset);
155                 if (size < BITS_PER_LONG)
156                         tmp |= ~0UL >> size;
157                 if (tmp != ~0UL)
158                         goto found;
159 pass:
160                 if (size <= BITS_PER_LONG)
161                         break;
162                 size -= BITS_PER_LONG;
163                 offset = 0;
164                 p++;
165         }
166         return result;
167 found:
168         return result - size + __reverse_ffz(tmp);
169 }
170 
171 bool f2fs_need_SSR(struct f2fs_sb_info *sbi)
172 {
173         int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
174         int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
175         int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA);
176 
177         if (f2fs_lfs_mode(sbi))
178                 return false;
179         if (sbi->gc_mode == GC_URGENT_HIGH)
180                 return true;
181         if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
182                 return true;
183 
184         return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
185                         SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
186 }
187 
188 void f2fs_abort_atomic_write(struct inode *inode, bool clean)
189 {
190         struct f2fs_inode_info *fi = F2FS_I(inode);
191 
192         if (!f2fs_is_atomic_file(inode))
193                 return;
194 
195         if (clean)
196                 truncate_inode_pages_final(inode->i_mapping);
197 
198         release_atomic_write_cnt(inode);
199         clear_inode_flag(inode, FI_ATOMIC_COMMITTED);
200         clear_inode_flag(inode, FI_ATOMIC_REPLACE);
201         clear_inode_flag(inode, FI_ATOMIC_FILE);
202         if (is_inode_flag_set(inode, FI_ATOMIC_DIRTIED)) {
203                 clear_inode_flag(inode, FI_ATOMIC_DIRTIED);
204                 f2fs_mark_inode_dirty_sync(inode, true);
205         }
206         stat_dec_atomic_inode(inode);
207 
208         F2FS_I(inode)->atomic_write_task = NULL;
209 
210         if (clean) {
211                 f2fs_i_size_write(inode, fi->original_i_size);
212                 fi->original_i_size = 0;
213         }
214         /* avoid stale dirty inode during eviction */
215         sync_inode_metadata(inode, 0);
216 }
217 
218 static int __replace_atomic_write_block(struct inode *inode, pgoff_t index,
219                         block_t new_addr, block_t *old_addr, bool recover)
220 {
221         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
222         struct dnode_of_data dn;
223         struct node_info ni;
224         int err;
225 
226 retry:
227         set_new_dnode(&dn, inode, NULL, NULL, 0);
228         err = f2fs_get_dnode_of_data(&dn, index, ALLOC_NODE);
229         if (err) {
230                 if (err == -ENOMEM) {
231                         f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
232                         goto retry;
233                 }
234                 return err;
235         }
236 
237         err = f2fs_get_node_info(sbi, dn.nid, &ni, false);
238         if (err) {
239                 f2fs_put_dnode(&dn);
240                 return err;
241         }
242 
243         if (recover) {
244                 /* dn.data_blkaddr is always valid */
245                 if (!__is_valid_data_blkaddr(new_addr)) {
246                         if (new_addr == NULL_ADDR)
247                                 dec_valid_block_count(sbi, inode, 1);
248                         f2fs_invalidate_blocks(sbi, dn.data_blkaddr);
249                         f2fs_update_data_blkaddr(&dn, new_addr);
250                 } else {
251                         f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
252                                 new_addr, ni.version, true, true);
253                 }
254         } else {
255                 blkcnt_t count = 1;
256 
257                 err = inc_valid_block_count(sbi, inode, &count, true);
258                 if (err) {
259                         f2fs_put_dnode(&dn);
260                         return err;
261                 }
262 
263                 *old_addr = dn.data_blkaddr;
264                 f2fs_truncate_data_blocks_range(&dn, 1);
265                 dec_valid_block_count(sbi, F2FS_I(inode)->cow_inode, count);
266 
267                 f2fs_replace_block(sbi, &dn, dn.data_blkaddr, new_addr,
268                                         ni.version, true, false);
269         }
270 
271         f2fs_put_dnode(&dn);
272 
273         trace_f2fs_replace_atomic_write_block(inode, F2FS_I(inode)->cow_inode,
274                         index, old_addr ? *old_addr : 0, new_addr, recover);
275         return 0;
276 }
277 
278 static void __complete_revoke_list(struct inode *inode, struct list_head *head,
279                                         bool revoke)
280 {
281         struct revoke_entry *cur, *tmp;
282         pgoff_t start_index = 0;
283         bool truncate = is_inode_flag_set(inode, FI_ATOMIC_REPLACE);
284 
285         list_for_each_entry_safe(cur, tmp, head, list) {
286                 if (revoke) {
287                         __replace_atomic_write_block(inode, cur->index,
288                                                 cur->old_addr, NULL, true);
289                 } else if (truncate) {
290                         f2fs_truncate_hole(inode, start_index, cur->index);
291                         start_index = cur->index + 1;
292                 }
293 
294                 list_del(&cur->list);
295                 kmem_cache_free(revoke_entry_slab, cur);
296         }
297 
298         if (!revoke && truncate)
299                 f2fs_do_truncate_blocks(inode, start_index * PAGE_SIZE, false);
300 }
301 
302 static int __f2fs_commit_atomic_write(struct inode *inode)
303 {
304         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
305         struct f2fs_inode_info *fi = F2FS_I(inode);
306         struct inode *cow_inode = fi->cow_inode;
307         struct revoke_entry *new;
308         struct list_head revoke_list;
309         block_t blkaddr;
310         struct dnode_of_data dn;
311         pgoff_t len = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
312         pgoff_t off = 0, blen, index;
313         int ret = 0, i;
314 
315         INIT_LIST_HEAD(&revoke_list);
316 
317         while (len) {
318                 blen = min_t(pgoff_t, ADDRS_PER_BLOCK(cow_inode), len);
319 
320                 set_new_dnode(&dn, cow_inode, NULL, NULL, 0);
321                 ret = f2fs_get_dnode_of_data(&dn, off, LOOKUP_NODE_RA);
322                 if (ret && ret != -ENOENT) {
323                         goto out;
324                 } else if (ret == -ENOENT) {
325                         ret = 0;
326                         if (dn.max_level == 0)
327                                 goto out;
328                         goto next;
329                 }
330 
331                 blen = min((pgoff_t)ADDRS_PER_PAGE(dn.node_page, cow_inode),
332                                 len);
333                 index = off;
334                 for (i = 0; i < blen; i++, dn.ofs_in_node++, index++) {
335                         blkaddr = f2fs_data_blkaddr(&dn);
336 
337                         if (!__is_valid_data_blkaddr(blkaddr)) {
338                                 continue;
339                         } else if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
340                                         DATA_GENERIC_ENHANCE)) {
341                                 f2fs_put_dnode(&dn);
342                                 ret = -EFSCORRUPTED;
343                                 goto out;
344                         }
345 
346                         new = f2fs_kmem_cache_alloc(revoke_entry_slab, GFP_NOFS,
347                                                         true, NULL);
348 
349                         ret = __replace_atomic_write_block(inode, index, blkaddr,
350                                                         &new->old_addr, false);
351                         if (ret) {
352                                 f2fs_put_dnode(&dn);
353                                 kmem_cache_free(revoke_entry_slab, new);
354                                 goto out;
355                         }
356 
357                         f2fs_update_data_blkaddr(&dn, NULL_ADDR);
358                         new->index = index;
359                         list_add_tail(&new->list, &revoke_list);
360                 }
361                 f2fs_put_dnode(&dn);
362 next:
363                 off += blen;
364                 len -= blen;
365         }
366 
367 out:
368         if (ret) {
369                 sbi->revoked_atomic_block += fi->atomic_write_cnt;
370         } else {
371                 sbi->committed_atomic_block += fi->atomic_write_cnt;
372                 set_inode_flag(inode, FI_ATOMIC_COMMITTED);
373                 if (is_inode_flag_set(inode, FI_ATOMIC_DIRTIED)) {
374                         clear_inode_flag(inode, FI_ATOMIC_DIRTIED);
375                         f2fs_mark_inode_dirty_sync(inode, true);
376                 }
377         }
378 
379         __complete_revoke_list(inode, &revoke_list, ret ? true : false);
380 
381         return ret;
382 }
383 
384 int f2fs_commit_atomic_write(struct inode *inode)
385 {
386         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
387         struct f2fs_inode_info *fi = F2FS_I(inode);
388         int err;
389 
390         err = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
391         if (err)
392                 return err;
393 
394         f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
395         f2fs_lock_op(sbi);
396 
397         err = __f2fs_commit_atomic_write(inode);
398 
399         f2fs_unlock_op(sbi);
400         f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
401 
402         return err;
403 }
404 
405 /*
406  * This function balances dirty node and dentry pages.
407  * In addition, it controls garbage collection.
408  */
409 void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
410 {
411         if (f2fs_cp_error(sbi))
412                 return;
413 
414         if (time_to_inject(sbi, FAULT_CHECKPOINT))
415                 f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_FAULT_INJECT);
416 
417         /* balance_fs_bg is able to be pending */
418         if (need && excess_cached_nats(sbi))
419                 f2fs_balance_fs_bg(sbi, false);
420 
421         if (!f2fs_is_checkpoint_ready(sbi))
422                 return;
423 
424         /*
425          * We should do GC or end up with checkpoint, if there are so many dirty
426          * dir/node pages without enough free segments.
427          */
428         if (has_enough_free_secs(sbi, 0, 0))
429                 return;
430 
431         if (test_opt(sbi, GC_MERGE) && sbi->gc_thread &&
432                                 sbi->gc_thread->f2fs_gc_task) {
433                 DEFINE_WAIT(wait);
434 
435                 prepare_to_wait(&sbi->gc_thread->fggc_wq, &wait,
436                                         TASK_UNINTERRUPTIBLE);
437                 wake_up(&sbi->gc_thread->gc_wait_queue_head);
438                 io_schedule();
439                 finish_wait(&sbi->gc_thread->fggc_wq, &wait);
440         } else {
441                 struct f2fs_gc_control gc_control = {
442                         .victim_segno = NULL_SEGNO,
443                         .init_gc_type = BG_GC,
444                         .no_bg_gc = true,
445                         .should_migrate_blocks = false,
446                         .err_gc_skipped = false,
447                         .nr_free_secs = 1 };
448                 f2fs_down_write(&sbi->gc_lock);
449                 stat_inc_gc_call_count(sbi, FOREGROUND);
450                 f2fs_gc(sbi, &gc_control);
451         }
452 }
453 
454 static inline bool excess_dirty_threshold(struct f2fs_sb_info *sbi)
455 {
456         int factor = f2fs_rwsem_is_locked(&sbi->cp_rwsem) ? 3 : 2;
457         unsigned int dents = get_pages(sbi, F2FS_DIRTY_DENTS);
458         unsigned int qdata = get_pages(sbi, F2FS_DIRTY_QDATA);
459         unsigned int nodes = get_pages(sbi, F2FS_DIRTY_NODES);
460         unsigned int meta = get_pages(sbi, F2FS_DIRTY_META);
461         unsigned int imeta = get_pages(sbi, F2FS_DIRTY_IMETA);
462         unsigned int threshold =
463                 SEGS_TO_BLKS(sbi, (factor * DEFAULT_DIRTY_THRESHOLD));
464         unsigned int global_threshold = threshold * 3 / 2;
465 
466         if (dents >= threshold || qdata >= threshold ||
467                 nodes >= threshold || meta >= threshold ||
468                 imeta >= threshold)
469                 return true;
470         return dents + qdata + nodes + meta + imeta >  global_threshold;
471 }
472 
473 void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg)
474 {
475         if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
476                 return;
477 
478         /* try to shrink extent cache when there is no enough memory */
479         if (!f2fs_available_free_memory(sbi, READ_EXTENT_CACHE))
480                 f2fs_shrink_read_extent_tree(sbi,
481                                 READ_EXTENT_CACHE_SHRINK_NUMBER);
482 
483         /* try to shrink age extent cache when there is no enough memory */
484         if (!f2fs_available_free_memory(sbi, AGE_EXTENT_CACHE))
485                 f2fs_shrink_age_extent_tree(sbi,
486                                 AGE_EXTENT_CACHE_SHRINK_NUMBER);
487 
488         /* check the # of cached NAT entries */
489         if (!f2fs_available_free_memory(sbi, NAT_ENTRIES))
490                 f2fs_try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK);
491 
492         if (!f2fs_available_free_memory(sbi, FREE_NIDS))
493                 f2fs_try_to_free_nids(sbi, MAX_FREE_NIDS);
494         else
495                 f2fs_build_free_nids(sbi, false, false);
496 
497         if (excess_dirty_nats(sbi) || excess_dirty_threshold(sbi) ||
498                 excess_prefree_segs(sbi) || !f2fs_space_for_roll_forward(sbi))
499                 goto do_sync;
500 
501         /* there is background inflight IO or foreground operation recently */
502         if (is_inflight_io(sbi, REQ_TIME) ||
503                 (!f2fs_time_over(sbi, REQ_TIME) && f2fs_rwsem_is_locked(&sbi->cp_rwsem)))
504                 return;
505 
506         /* exceed periodical checkpoint timeout threshold */
507         if (f2fs_time_over(sbi, CP_TIME))
508                 goto do_sync;
509 
510         /* checkpoint is the only way to shrink partial cached entries */
511         if (f2fs_available_free_memory(sbi, NAT_ENTRIES) &&
512                 f2fs_available_free_memory(sbi, INO_ENTRIES))
513                 return;
514 
515 do_sync:
516         if (test_opt(sbi, DATA_FLUSH) && from_bg) {
517                 struct blk_plug plug;
518 
519                 mutex_lock(&sbi->flush_lock);
520 
521                 blk_start_plug(&plug);
522                 f2fs_sync_dirty_inodes(sbi, FILE_INODE, false);
523                 blk_finish_plug(&plug);
524 
525                 mutex_unlock(&sbi->flush_lock);
526         }
527         stat_inc_cp_call_count(sbi, BACKGROUND);
528         f2fs_sync_fs(sbi->sb, 1);
529 }
530 
531 static int __submit_flush_wait(struct f2fs_sb_info *sbi,
532                                 struct block_device *bdev)
533 {
534         int ret = blkdev_issue_flush(bdev);
535 
536         trace_f2fs_issue_flush(bdev, test_opt(sbi, NOBARRIER),
537                                 test_opt(sbi, FLUSH_MERGE), ret);
538         if (!ret)
539                 f2fs_update_iostat(sbi, NULL, FS_FLUSH_IO, 0);
540         return ret;
541 }
542 
543 static int submit_flush_wait(struct f2fs_sb_info *sbi, nid_t ino)
544 {
545         int ret = 0;
546         int i;
547 
548         if (!f2fs_is_multi_device(sbi))
549                 return __submit_flush_wait(sbi, sbi->sb->s_bdev);
550 
551         for (i = 0; i < sbi->s_ndevs; i++) {
552                 if (!f2fs_is_dirty_device(sbi, ino, i, FLUSH_INO))
553                         continue;
554                 ret = __submit_flush_wait(sbi, FDEV(i).bdev);
555                 if (ret)
556                         break;
557         }
558         return ret;
559 }
560 
561 static int issue_flush_thread(void *data)
562 {
563         struct f2fs_sb_info *sbi = data;
564         struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
565         wait_queue_head_t *q = &fcc->flush_wait_queue;
566 repeat:
567         if (kthread_should_stop())
568                 return 0;
569 
570         if (!llist_empty(&fcc->issue_list)) {
571                 struct flush_cmd *cmd, *next;
572                 int ret;
573 
574                 fcc->dispatch_list = llist_del_all(&fcc->issue_list);
575                 fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
576 
577                 cmd = llist_entry(fcc->dispatch_list, struct flush_cmd, llnode);
578 
579                 ret = submit_flush_wait(sbi, cmd->ino);
580                 atomic_inc(&fcc->issued_flush);
581 
582                 llist_for_each_entry_safe(cmd, next,
583                                           fcc->dispatch_list, llnode) {
584                         cmd->ret = ret;
585                         complete(&cmd->wait);
586                 }
587                 fcc->dispatch_list = NULL;
588         }
589 
590         wait_event_interruptible(*q,
591                 kthread_should_stop() || !llist_empty(&fcc->issue_list));
592         goto repeat;
593 }
594 
595 int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino)
596 {
597         struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
598         struct flush_cmd cmd;
599         int ret;
600 
601         if (test_opt(sbi, NOBARRIER))
602                 return 0;
603 
604         if (!test_opt(sbi, FLUSH_MERGE)) {
605                 atomic_inc(&fcc->queued_flush);
606                 ret = submit_flush_wait(sbi, ino);
607                 atomic_dec(&fcc->queued_flush);
608                 atomic_inc(&fcc->issued_flush);
609                 return ret;
610         }
611 
612         if (atomic_inc_return(&fcc->queued_flush) == 1 ||
613             f2fs_is_multi_device(sbi)) {
614                 ret = submit_flush_wait(sbi, ino);
615                 atomic_dec(&fcc->queued_flush);
616 
617                 atomic_inc(&fcc->issued_flush);
618                 return ret;
619         }
620 
621         cmd.ino = ino;
622         init_completion(&cmd.wait);
623 
624         llist_add(&cmd.llnode, &fcc->issue_list);
625 
626         /*
627          * update issue_list before we wake up issue_flush thread, this
628          * smp_mb() pairs with another barrier in ___wait_event(), see
629          * more details in comments of waitqueue_active().
630          */
631         smp_mb();
632 
633         if (waitqueue_active(&fcc->flush_wait_queue))
634                 wake_up(&fcc->flush_wait_queue);
635 
636         if (fcc->f2fs_issue_flush) {
637                 wait_for_completion(&cmd.wait);
638                 atomic_dec(&fcc->queued_flush);
639         } else {
640                 struct llist_node *list;
641 
642                 list = llist_del_all(&fcc->issue_list);
643                 if (!list) {
644                         wait_for_completion(&cmd.wait);
645                         atomic_dec(&fcc->queued_flush);
646                 } else {
647                         struct flush_cmd *tmp, *next;
648 
649                         ret = submit_flush_wait(sbi, ino);
650 
651                         llist_for_each_entry_safe(tmp, next, list, llnode) {
652                                 if (tmp == &cmd) {
653                                         cmd.ret = ret;
654                                         atomic_dec(&fcc->queued_flush);
655                                         continue;
656                                 }
657                                 tmp->ret = ret;
658                                 complete(&tmp->wait);
659                         }
660                 }
661         }
662 
663         return cmd.ret;
664 }
665 
666 int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi)
667 {
668         dev_t dev = sbi->sb->s_bdev->bd_dev;
669         struct flush_cmd_control *fcc;
670 
671         if (SM_I(sbi)->fcc_info) {
672                 fcc = SM_I(sbi)->fcc_info;
673                 if (fcc->f2fs_issue_flush)
674                         return 0;
675                 goto init_thread;
676         }
677 
678         fcc = f2fs_kzalloc(sbi, sizeof(struct flush_cmd_control), GFP_KERNEL);
679         if (!fcc)
680                 return -ENOMEM;
681         atomic_set(&fcc->issued_flush, 0);
682         atomic_set(&fcc->queued_flush, 0);
683         init_waitqueue_head(&fcc->flush_wait_queue);
684         init_llist_head(&fcc->issue_list);
685         SM_I(sbi)->fcc_info = fcc;
686         if (!test_opt(sbi, FLUSH_MERGE))
687                 return 0;
688 
689 init_thread:
690         fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
691                                 "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
692         if (IS_ERR(fcc->f2fs_issue_flush)) {
693                 int err = PTR_ERR(fcc->f2fs_issue_flush);
694 
695                 fcc->f2fs_issue_flush = NULL;
696                 return err;
697         }
698 
699         return 0;
700 }
701 
702 void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
703 {
704         struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
705 
706         if (fcc && fcc->f2fs_issue_flush) {
707                 struct task_struct *flush_thread = fcc->f2fs_issue_flush;
708 
709                 fcc->f2fs_issue_flush = NULL;
710                 kthread_stop(flush_thread);
711         }
712         if (free) {
713                 kfree(fcc);
714                 SM_I(sbi)->fcc_info = NULL;
715         }
716 }
717 
718 int f2fs_flush_device_cache(struct f2fs_sb_info *sbi)
719 {
720         int ret = 0, i;
721 
722         if (!f2fs_is_multi_device(sbi))
723                 return 0;
724 
725         if (test_opt(sbi, NOBARRIER))
726                 return 0;
727 
728         for (i = 1; i < sbi->s_ndevs; i++) {
729                 int count = DEFAULT_RETRY_IO_COUNT;
730 
731                 if (!f2fs_test_bit(i, (char *)&sbi->dirty_device))
732                         continue;
733 
734                 do {
735                         ret = __submit_flush_wait(sbi, FDEV(i).bdev);
736                         if (ret)
737                                 f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
738                 } while (ret && --count);
739 
740                 if (ret) {
741                         f2fs_stop_checkpoint(sbi, false,
742                                         STOP_CP_REASON_FLUSH_FAIL);
743                         break;
744                 }
745 
746                 spin_lock(&sbi->dev_lock);
747                 f2fs_clear_bit(i, (char *)&sbi->dirty_device);
748                 spin_unlock(&sbi->dev_lock);
749         }
750 
751         return ret;
752 }
753 
754 static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
755                 enum dirty_type dirty_type)
756 {
757         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
758 
759         /* need not be added */
760         if (IS_CURSEG(sbi, segno))
761                 return;
762 
763         if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
764                 dirty_i->nr_dirty[dirty_type]++;
765 
766         if (dirty_type == DIRTY) {
767                 struct seg_entry *sentry = get_seg_entry(sbi, segno);
768                 enum dirty_type t = sentry->type;
769 
770                 if (unlikely(t >= DIRTY)) {
771                         f2fs_bug_on(sbi, 1);
772                         return;
773                 }
774                 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t]))
775                         dirty_i->nr_dirty[t]++;
776 
777                 if (__is_large_section(sbi)) {
778                         unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
779                         block_t valid_blocks =
780                                 get_valid_blocks(sbi, segno, true);
781 
782                         f2fs_bug_on(sbi,
783                                 (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
784                                 !valid_blocks) ||
785                                 valid_blocks == CAP_BLKS_PER_SEC(sbi));
786 
787                         if (!IS_CURSEC(sbi, secno))
788                                 set_bit(secno, dirty_i->dirty_secmap);
789                 }
790         }
791 }
792 
793 static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
794                 enum dirty_type dirty_type)
795 {
796         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
797         block_t valid_blocks;
798 
799         if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
800                 dirty_i->nr_dirty[dirty_type]--;
801 
802         if (dirty_type == DIRTY) {
803                 struct seg_entry *sentry = get_seg_entry(sbi, segno);
804                 enum dirty_type t = sentry->type;
805 
806                 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
807                         dirty_i->nr_dirty[t]--;
808 
809                 valid_blocks = get_valid_blocks(sbi, segno, true);
810                 if (valid_blocks == 0) {
811                         clear_bit(GET_SEC_FROM_SEG(sbi, segno),
812                                                 dirty_i->victim_secmap);
813 #ifdef CONFIG_F2FS_CHECK_FS
814                         clear_bit(segno, SIT_I(sbi)->invalid_segmap);
815 #endif
816                 }
817                 if (__is_large_section(sbi)) {
818                         unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
819 
820                         if (!valid_blocks ||
821                                         valid_blocks == CAP_BLKS_PER_SEC(sbi)) {
822                                 clear_bit(secno, dirty_i->dirty_secmap);
823                                 return;
824                         }
825 
826                         if (!IS_CURSEC(sbi, secno))
827                                 set_bit(secno, dirty_i->dirty_secmap);
828                 }
829         }
830 }
831 
832 /*
833  * Should not occur error such as -ENOMEM.
834  * Adding dirty entry into seglist is not critical operation.
835  * If a given segment is one of current working segments, it won't be added.
836  */
837 static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
838 {
839         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
840         unsigned short valid_blocks, ckpt_valid_blocks;
841         unsigned int usable_blocks;
842 
843         if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
844                 return;
845 
846         usable_blocks = f2fs_usable_blks_in_seg(sbi, segno);
847         mutex_lock(&dirty_i->seglist_lock);
848 
849         valid_blocks = get_valid_blocks(sbi, segno, false);
850         ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno, false);
851 
852         if (valid_blocks == 0 && (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) ||
853                 ckpt_valid_blocks == usable_blocks)) {
854                 __locate_dirty_segment(sbi, segno, PRE);
855                 __remove_dirty_segment(sbi, segno, DIRTY);
856         } else if (valid_blocks < usable_blocks) {
857                 __locate_dirty_segment(sbi, segno, DIRTY);
858         } else {
859                 /* Recovery routine with SSR needs this */
860                 __remove_dirty_segment(sbi, segno, DIRTY);
861         }
862 
863         mutex_unlock(&dirty_i->seglist_lock);
864 }
865 
866 /* This moves currently empty dirty blocks to prefree. Must hold seglist_lock */
867 void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi)
868 {
869         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
870         unsigned int segno;
871 
872         mutex_lock(&dirty_i->seglist_lock);
873         for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
874                 if (get_valid_blocks(sbi, segno, false))
875                         continue;
876                 if (IS_CURSEG(sbi, segno))
877                         continue;
878                 __locate_dirty_segment(sbi, segno, PRE);
879                 __remove_dirty_segment(sbi, segno, DIRTY);
880         }
881         mutex_unlock(&dirty_i->seglist_lock);
882 }
883 
884 block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi)
885 {
886         int ovp_hole_segs =
887                 (overprovision_segments(sbi) - reserved_segments(sbi));
888         block_t ovp_holes = SEGS_TO_BLKS(sbi, ovp_hole_segs);
889         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
890         block_t holes[2] = {0, 0};      /* DATA and NODE */
891         block_t unusable;
892         struct seg_entry *se;
893         unsigned int segno;
894 
895         mutex_lock(&dirty_i->seglist_lock);
896         for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
897                 se = get_seg_entry(sbi, segno);
898                 if (IS_NODESEG(se->type))
899                         holes[NODE] += f2fs_usable_blks_in_seg(sbi, segno) -
900                                                         se->valid_blocks;
901                 else
902                         holes[DATA] += f2fs_usable_blks_in_seg(sbi, segno) -
903                                                         se->valid_blocks;
904         }
905         mutex_unlock(&dirty_i->seglist_lock);
906 
907         unusable = max(holes[DATA], holes[NODE]);
908         if (unusable > ovp_holes)
909                 return unusable - ovp_holes;
910         return 0;
911 }
912 
913 int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable)
914 {
915         int ovp_hole_segs =
916                 (overprovision_segments(sbi) - reserved_segments(sbi));
917 
918         if (F2FS_OPTION(sbi).unusable_cap_perc == 100)
919                 return 0;
920         if (unusable > F2FS_OPTION(sbi).unusable_cap)
921                 return -EAGAIN;
922         if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK) &&
923                 dirty_segments(sbi) > ovp_hole_segs)
924                 return -EAGAIN;
925         if (has_not_enough_free_secs(sbi, 0, 0))
926                 return -EAGAIN;
927         return 0;
928 }
929 
930 /* This is only used by SBI_CP_DISABLED */
931 static unsigned int get_free_segment(struct f2fs_sb_info *sbi)
932 {
933         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
934         unsigned int segno = 0;
935 
936         mutex_lock(&dirty_i->seglist_lock);
937         for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
938                 if (get_valid_blocks(sbi, segno, false))
939                         continue;
940                 if (get_ckpt_valid_blocks(sbi, segno, false))
941                         continue;
942                 mutex_unlock(&dirty_i->seglist_lock);
943                 return segno;
944         }
945         mutex_unlock(&dirty_i->seglist_lock);
946         return NULL_SEGNO;
947 }
948 
949 static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi,
950                 struct block_device *bdev, block_t lstart,
951                 block_t start, block_t len)
952 {
953         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
954         struct list_head *pend_list;
955         struct discard_cmd *dc;
956 
957         f2fs_bug_on(sbi, !len);
958 
959         pend_list = &dcc->pend_list[plist_idx(len)];
960 
961         dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS, true, NULL);
962         INIT_LIST_HEAD(&dc->list);
963         dc->bdev = bdev;
964         dc->di.lstart = lstart;
965         dc->di.start = start;
966         dc->di.len = len;
967         dc->ref = 0;
968         dc->state = D_PREP;
969         dc->queued = 0;
970         dc->error = 0;
971         init_completion(&dc->wait);
972         list_add_tail(&dc->list, pend_list);
973         spin_lock_init(&dc->lock);
974         dc->bio_ref = 0;
975         atomic_inc(&dcc->discard_cmd_cnt);
976         dcc->undiscard_blks += len;
977 
978         return dc;
979 }
980 
981 static bool f2fs_check_discard_tree(struct f2fs_sb_info *sbi)
982 {
983 #ifdef CONFIG_F2FS_CHECK_FS
984         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
985         struct rb_node *cur = rb_first_cached(&dcc->root), *next;
986         struct discard_cmd *cur_dc, *next_dc;
987 
988         while (cur) {
989                 next = rb_next(cur);
990                 if (!next)
991                         return true;
992 
993                 cur_dc = rb_entry(cur, struct discard_cmd, rb_node);
994                 next_dc = rb_entry(next, struct discard_cmd, rb_node);
995 
996                 if (cur_dc->di.lstart + cur_dc->di.len > next_dc->di.lstart) {
997                         f2fs_info(sbi, "broken discard_rbtree, "
998                                 "cur(%u, %u) next(%u, %u)",
999                                 cur_dc->di.lstart, cur_dc->di.len,
1000                                 next_dc->di.lstart, next_dc->di.len);
1001                         return false;
1002                 }
1003                 cur = next;
1004         }
1005 #endif
1006         return true;
1007 }
1008 
1009 static struct discard_cmd *__lookup_discard_cmd(struct f2fs_sb_info *sbi,
1010                                                 block_t blkaddr)
1011 {
1012         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1013         struct rb_node *node = dcc->root.rb_root.rb_node;
1014         struct discard_cmd *dc;
1015 
1016         while (node) {
1017                 dc = rb_entry(node, struct discard_cmd, rb_node);
1018 
1019                 if (blkaddr < dc->di.lstart)
1020                         node = node->rb_left;
1021                 else if (blkaddr >= dc->di.lstart + dc->di.len)
1022                         node = node->rb_right;
1023                 else
1024                         return dc;
1025         }
1026         return NULL;
1027 }
1028 
1029 static struct discard_cmd *__lookup_discard_cmd_ret(struct rb_root_cached *root,
1030                                 block_t blkaddr,
1031                                 struct discard_cmd **prev_entry,
1032                                 struct discard_cmd **next_entry,
1033                                 struct rb_node ***insert_p,
1034                                 struct rb_node **insert_parent)
1035 {
1036         struct rb_node **pnode = &root->rb_root.rb_node;
1037         struct rb_node *parent = NULL, *tmp_node;
1038         struct discard_cmd *dc;
1039 
1040         *insert_p = NULL;
1041         *insert_parent = NULL;
1042         *prev_entry = NULL;
1043         *next_entry = NULL;
1044 
1045         if (RB_EMPTY_ROOT(&root->rb_root))
1046                 return NULL;
1047 
1048         while (*pnode) {
1049                 parent = *pnode;
1050                 dc = rb_entry(*pnode, struct discard_cmd, rb_node);
1051 
1052                 if (blkaddr < dc->di.lstart)
1053                         pnode = &(*pnode)->rb_left;
1054                 else if (blkaddr >= dc->di.lstart + dc->di.len)
1055                         pnode = &(*pnode)->rb_right;
1056                 else
1057                         goto lookup_neighbors;
1058         }
1059 
1060         *insert_p = pnode;
1061         *insert_parent = parent;
1062 
1063         dc = rb_entry(parent, struct discard_cmd, rb_node);
1064         tmp_node = parent;
1065         if (parent && blkaddr > dc->di.lstart)
1066                 tmp_node = rb_next(parent);
1067         *next_entry = rb_entry_safe(tmp_node, struct discard_cmd, rb_node);
1068 
1069         tmp_node = parent;
1070         if (parent && blkaddr < dc->di.lstart)
1071                 tmp_node = rb_prev(parent);
1072         *prev_entry = rb_entry_safe(tmp_node, struct discard_cmd, rb_node);
1073         return NULL;
1074 
1075 lookup_neighbors:
1076         /* lookup prev node for merging backward later */
1077         tmp_node = rb_prev(&dc->rb_node);
1078         *prev_entry = rb_entry_safe(tmp_node, struct discard_cmd, rb_node);
1079 
1080         /* lookup next node for merging frontward later */
1081         tmp_node = rb_next(&dc->rb_node);
1082         *next_entry = rb_entry_safe(tmp_node, struct discard_cmd, rb_node);
1083         return dc;
1084 }
1085 
1086 static void __detach_discard_cmd(struct discard_cmd_control *dcc,
1087                                                         struct discard_cmd *dc)
1088 {
1089         if (dc->state == D_DONE)
1090                 atomic_sub(dc->queued, &dcc->queued_discard);
1091 
1092         list_del(&dc->list);
1093         rb_erase_cached(&dc->rb_node, &dcc->root);
1094         dcc->undiscard_blks -= dc->di.len;
1095 
1096         kmem_cache_free(discard_cmd_slab, dc);
1097 
1098         atomic_dec(&dcc->discard_cmd_cnt);
1099 }
1100 
1101 static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
1102                                                         struct discard_cmd *dc)
1103 {
1104         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1105         unsigned long flags;
1106 
1107         trace_f2fs_remove_discard(dc->bdev, dc->di.start, dc->di.len);
1108 
1109         spin_lock_irqsave(&dc->lock, flags);
1110         if (dc->bio_ref) {
1111                 spin_unlock_irqrestore(&dc->lock, flags);
1112                 return;
1113         }
1114         spin_unlock_irqrestore(&dc->lock, flags);
1115 
1116         f2fs_bug_on(sbi, dc->ref);
1117 
1118         if (dc->error == -EOPNOTSUPP)
1119                 dc->error = 0;
1120 
1121         if (dc->error)
1122                 f2fs_info_ratelimited(sbi,
1123                         "Issue discard(%u, %u, %u) failed, ret: %d",
1124                         dc->di.lstart, dc->di.start, dc->di.len, dc->error);
1125         __detach_discard_cmd(dcc, dc);
1126 }
1127 
1128 static void f2fs_submit_discard_endio(struct bio *bio)
1129 {
1130         struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
1131         unsigned long flags;
1132 
1133         spin_lock_irqsave(&dc->lock, flags);
1134         if (!dc->error)
1135                 dc->error = blk_status_to_errno(bio->bi_status);
1136         dc->bio_ref--;
1137         if (!dc->bio_ref && dc->state == D_SUBMIT) {
1138                 dc->state = D_DONE;
1139                 complete_all(&dc->wait);
1140         }
1141         spin_unlock_irqrestore(&dc->lock, flags);
1142         bio_put(bio);
1143 }
1144 
1145 static void __check_sit_bitmap(struct f2fs_sb_info *sbi,
1146                                 block_t start, block_t end)
1147 {
1148 #ifdef CONFIG_F2FS_CHECK_FS
1149         struct seg_entry *sentry;
1150         unsigned int segno;
1151         block_t blk = start;
1152         unsigned long offset, size, *map;
1153 
1154         while (blk < end) {
1155                 segno = GET_SEGNO(sbi, blk);
1156                 sentry = get_seg_entry(sbi, segno);
1157                 offset = GET_BLKOFF_FROM_SEG0(sbi, blk);
1158 
1159                 if (end < START_BLOCK(sbi, segno + 1))
1160                         size = GET_BLKOFF_FROM_SEG0(sbi, end);
1161                 else
1162                         size = BLKS_PER_SEG(sbi);
1163                 map = (unsigned long *)(sentry->cur_valid_map);
1164                 offset = __find_rev_next_bit(map, size, offset);
1165                 f2fs_bug_on(sbi, offset != size);
1166                 blk = START_BLOCK(sbi, segno + 1);
1167         }
1168 #endif
1169 }
1170 
1171 static void __init_discard_policy(struct f2fs_sb_info *sbi,
1172                                 struct discard_policy *dpolicy,
1173                                 int discard_type, unsigned int granularity)
1174 {
1175         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1176 
1177         /* common policy */
1178         dpolicy->type = discard_type;
1179         dpolicy->sync = true;
1180         dpolicy->ordered = false;
1181         dpolicy->granularity = granularity;
1182 
1183         dpolicy->max_requests = dcc->max_discard_request;
1184         dpolicy->io_aware_gran = dcc->discard_io_aware_gran;
1185         dpolicy->timeout = false;
1186 
1187         if (discard_type == DPOLICY_BG) {
1188                 dpolicy->min_interval = dcc->min_discard_issue_time;
1189                 dpolicy->mid_interval = dcc->mid_discard_issue_time;
1190                 dpolicy->max_interval = dcc->max_discard_issue_time;
1191                 if (dcc->discard_io_aware == DPOLICY_IO_AWARE_ENABLE)
1192                         dpolicy->io_aware = true;
1193                 else if (dcc->discard_io_aware == DPOLICY_IO_AWARE_DISABLE)
1194                         dpolicy->io_aware = false;
1195                 dpolicy->sync = false;
1196                 dpolicy->ordered = true;
1197                 if (utilization(sbi) > dcc->discard_urgent_util) {
1198                         dpolicy->granularity = MIN_DISCARD_GRANULARITY;
1199                         if (atomic_read(&dcc->discard_cmd_cnt))
1200                                 dpolicy->max_interval =
1201                                         dcc->min_discard_issue_time;
1202                 }
1203         } else if (discard_type == DPOLICY_FORCE) {
1204                 dpolicy->min_interval = dcc->min_discard_issue_time;
1205                 dpolicy->mid_interval = dcc->mid_discard_issue_time;
1206                 dpolicy->max_interval = dcc->max_discard_issue_time;
1207                 dpolicy->io_aware = false;
1208         } else if (discard_type == DPOLICY_FSTRIM) {
1209                 dpolicy->io_aware = false;
1210         } else if (discard_type == DPOLICY_UMOUNT) {
1211                 dpolicy->io_aware = false;
1212                 /* we need to issue all to keep CP_TRIMMED_FLAG */
1213                 dpolicy->granularity = MIN_DISCARD_GRANULARITY;
1214                 dpolicy->timeout = true;
1215         }
1216 }
1217 
1218 static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
1219                                 struct block_device *bdev, block_t lstart,
1220                                 block_t start, block_t len);
1221 
1222 #ifdef CONFIG_BLK_DEV_ZONED
1223 static void __submit_zone_reset_cmd(struct f2fs_sb_info *sbi,
1224                                    struct discard_cmd *dc, blk_opf_t flag,
1225                                    struct list_head *wait_list,
1226                                    unsigned int *issued)
1227 {
1228         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1229         struct block_device *bdev = dc->bdev;
1230         struct bio *bio = bio_alloc(bdev, 0, REQ_OP_ZONE_RESET | flag, GFP_NOFS);
1231         unsigned long flags;
1232 
1233         trace_f2fs_issue_reset_zone(bdev, dc->di.start);
1234 
1235         spin_lock_irqsave(&dc->lock, flags);
1236         dc->state = D_SUBMIT;
1237         dc->bio_ref++;
1238         spin_unlock_irqrestore(&dc->lock, flags);
1239 
1240         if (issued)
1241                 (*issued)++;
1242 
1243         atomic_inc(&dcc->queued_discard);
1244         dc->queued++;
1245         list_move_tail(&dc->list, wait_list);
1246 
1247         /* sanity check on discard range */
1248         __check_sit_bitmap(sbi, dc->di.lstart, dc->di.lstart + dc->di.len);
1249 
1250         bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(dc->di.start);
1251         bio->bi_private = dc;
1252         bio->bi_end_io = f2fs_submit_discard_endio;
1253         submit_bio(bio);
1254 
1255         atomic_inc(&dcc->issued_discard);
1256         f2fs_update_iostat(sbi, NULL, FS_ZONE_RESET_IO, dc->di.len * F2FS_BLKSIZE);
1257 }
1258 #endif
1259 
1260 /* this function is copied from blkdev_issue_discard from block/blk-lib.c */
1261 static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
1262                                 struct discard_policy *dpolicy,
1263                                 struct discard_cmd *dc, int *issued)
1264 {
1265         struct block_device *bdev = dc->bdev;
1266         unsigned int max_discard_blocks =
1267                         SECTOR_TO_BLOCK(bdev_max_discard_sectors(bdev));
1268         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1269         struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
1270                                         &(dcc->fstrim_list) : &(dcc->wait_list);
1271         blk_opf_t flag = dpolicy->sync ? REQ_SYNC : 0;
1272         block_t lstart, start, len, total_len;
1273         int err = 0;
1274 
1275         if (dc->state != D_PREP)
1276                 return 0;
1277 
1278         if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
1279                 return 0;
1280 
1281 #ifdef CONFIG_BLK_DEV_ZONED
1282         if (f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(bdev)) {
1283                 int devi = f2fs_bdev_index(sbi, bdev);
1284 
1285                 if (devi < 0)
1286                         return -EINVAL;
1287 
1288                 if (f2fs_blkz_is_seq(sbi, devi, dc->di.start)) {
1289                         __submit_zone_reset_cmd(sbi, dc, flag,
1290                                                 wait_list, issued);
1291                         return 0;
1292                 }
1293 
1294                 /*
1295                  * Issue discard for conventional zones only if the device
1296                  * supports discard.
1297                  */
1298                 if (!bdev_max_discard_sectors(bdev))
1299                         return -EOPNOTSUPP;
1300         }
1301 #endif
1302 
1303         trace_f2fs_issue_discard(bdev, dc->di.start, dc->di.len);
1304 
1305         lstart = dc->di.lstart;
1306         start = dc->di.start;
1307         len = dc->di.len;
1308         total_len = len;
1309 
1310         dc->di.len = 0;
1311 
1312         while (total_len && *issued < dpolicy->max_requests && !err) {
1313                 struct bio *bio = NULL;
1314                 unsigned long flags;
1315                 bool last = true;
1316 
1317                 if (len > max_discard_blocks) {
1318                         len = max_discard_blocks;
1319                         last = false;
1320                 }
1321 
1322                 (*issued)++;
1323                 if (*issued == dpolicy->max_requests)
1324                         last = true;
1325 
1326                 dc->di.len += len;
1327 
1328                 if (time_to_inject(sbi, FAULT_DISCARD)) {
1329                         err = -EIO;
1330                 } else {
1331                         err = __blkdev_issue_discard(bdev,
1332                                         SECTOR_FROM_BLOCK(start),
1333                                         SECTOR_FROM_BLOCK(len),
1334                                         GFP_NOFS, &bio);
1335                 }
1336                 if (err) {
1337                         spin_lock_irqsave(&dc->lock, flags);
1338                         if (dc->state == D_PARTIAL)
1339                                 dc->state = D_SUBMIT;
1340                         spin_unlock_irqrestore(&dc->lock, flags);
1341 
1342                         break;
1343                 }
1344 
1345                 f2fs_bug_on(sbi, !bio);
1346 
1347                 /*
1348                  * should keep before submission to avoid D_DONE
1349                  * right away
1350                  */
1351                 spin_lock_irqsave(&dc->lock, flags);
1352                 if (last)
1353                         dc->state = D_SUBMIT;
1354                 else
1355                         dc->state = D_PARTIAL;
1356                 dc->bio_ref++;
1357                 spin_unlock_irqrestore(&dc->lock, flags);
1358 
1359                 atomic_inc(&dcc->queued_discard);
1360                 dc->queued++;
1361                 list_move_tail(&dc->list, wait_list);
1362 
1363                 /* sanity check on discard range */
1364                 __check_sit_bitmap(sbi, lstart, lstart + len);
1365 
1366                 bio->bi_private = dc;
1367                 bio->bi_end_io = f2fs_submit_discard_endio;
1368                 bio->bi_opf |= flag;
1369                 submit_bio(bio);
1370 
1371                 atomic_inc(&dcc->issued_discard);
1372 
1373                 f2fs_update_iostat(sbi, NULL, FS_DISCARD_IO, len * F2FS_BLKSIZE);
1374 
1375                 lstart += len;
1376                 start += len;
1377                 total_len -= len;
1378                 len = total_len;
1379         }
1380 
1381         if (!err && len) {
1382                 dcc->undiscard_blks -= len;
1383                 __update_discard_tree_range(sbi, bdev, lstart, start, len);
1384         }
1385         return err;
1386 }
1387 
1388 static void __insert_discard_cmd(struct f2fs_sb_info *sbi,
1389                                 struct block_device *bdev, block_t lstart,
1390                                 block_t start, block_t len)
1391 {
1392         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1393         struct rb_node **p = &dcc->root.rb_root.rb_node;
1394         struct rb_node *parent = NULL;
1395         struct discard_cmd *dc;
1396         bool leftmost = true;
1397 
1398         /* look up rb tree to find parent node */
1399         while (*p) {
1400                 parent = *p;
1401                 dc = rb_entry(parent, struct discard_cmd, rb_node);
1402 
1403                 if (lstart < dc->di.lstart) {
1404                         p = &(*p)->rb_left;
1405                 } else if (lstart >= dc->di.lstart + dc->di.len) {
1406                         p = &(*p)->rb_right;
1407                         leftmost = false;
1408                 } else {
1409                         /* Let's skip to add, if exists */
1410                         return;
1411                 }
1412         }
1413 
1414         dc = __create_discard_cmd(sbi, bdev, lstart, start, len);
1415 
1416         rb_link_node(&dc->rb_node, parent, p);
1417         rb_insert_color_cached(&dc->rb_node, &dcc->root, leftmost);
1418 }
1419 
1420 static void __relocate_discard_cmd(struct discard_cmd_control *dcc,
1421                                                 struct discard_cmd *dc)
1422 {
1423         list_move_tail(&dc->list, &dcc->pend_list[plist_idx(dc->di.len)]);
1424 }
1425 
1426 static void __punch_discard_cmd(struct f2fs_sb_info *sbi,
1427                                 struct discard_cmd *dc, block_t blkaddr)
1428 {
1429         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1430         struct discard_info di = dc->di;
1431         bool modified = false;
1432 
1433         if (dc->state == D_DONE || dc->di.len == 1) {
1434                 __remove_discard_cmd(sbi, dc);
1435                 return;
1436         }
1437 
1438         dcc->undiscard_blks -= di.len;
1439 
1440         if (blkaddr > di.lstart) {
1441                 dc->di.len = blkaddr - dc->di.lstart;
1442                 dcc->undiscard_blks += dc->di.len;
1443                 __relocate_discard_cmd(dcc, dc);
1444                 modified = true;
1445         }
1446 
1447         if (blkaddr < di.lstart + di.len - 1) {
1448                 if (modified) {
1449                         __insert_discard_cmd(sbi, dc->bdev, blkaddr + 1,
1450                                         di.start + blkaddr + 1 - di.lstart,
1451                                         di.lstart + di.len - 1 - blkaddr);
1452                 } else {
1453                         dc->di.lstart++;
1454                         dc->di.len--;
1455                         dc->di.start++;
1456                         dcc->undiscard_blks += dc->di.len;
1457                         __relocate_discard_cmd(dcc, dc);
1458                 }
1459         }
1460 }
1461 
1462 static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
1463                                 struct block_device *bdev, block_t lstart,
1464                                 block_t start, block_t len)
1465 {
1466         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1467         struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
1468         struct discard_cmd *dc;
1469         struct discard_info di = {0};
1470         struct rb_node **insert_p = NULL, *insert_parent = NULL;
1471         unsigned int max_discard_blocks =
1472                         SECTOR_TO_BLOCK(bdev_max_discard_sectors(bdev));
1473         block_t end = lstart + len;
1474 
1475         dc = __lookup_discard_cmd_ret(&dcc->root, lstart,
1476                                 &prev_dc, &next_dc, &insert_p, &insert_parent);
1477         if (dc)
1478                 prev_dc = dc;
1479 
1480         if (!prev_dc) {
1481                 di.lstart = lstart;
1482                 di.len = next_dc ? next_dc->di.lstart - lstart : len;
1483                 di.len = min(di.len, len);
1484                 di.start = start;
1485         }
1486 
1487         while (1) {
1488                 struct rb_node *node;
1489                 bool merged = false;
1490                 struct discard_cmd *tdc = NULL;
1491 
1492                 if (prev_dc) {
1493                         di.lstart = prev_dc->di.lstart + prev_dc->di.len;
1494                         if (di.lstart < lstart)
1495                                 di.lstart = lstart;
1496                         if (di.lstart >= end)
1497                                 break;
1498 
1499                         if (!next_dc || next_dc->di.lstart > end)
1500                                 di.len = end - di.lstart;
1501                         else
1502                                 di.len = next_dc->di.lstart - di.lstart;
1503                         di.start = start + di.lstart - lstart;
1504                 }
1505 
1506                 if (!di.len)
1507                         goto next;
1508 
1509                 if (prev_dc && prev_dc->state == D_PREP &&
1510                         prev_dc->bdev == bdev &&
1511                         __is_discard_back_mergeable(&di, &prev_dc->di,
1512                                                         max_discard_blocks)) {
1513                         prev_dc->di.len += di.len;
1514                         dcc->undiscard_blks += di.len;
1515                         __relocate_discard_cmd(dcc, prev_dc);
1516                         di = prev_dc->di;
1517                         tdc = prev_dc;
1518                         merged = true;
1519                 }
1520 
1521                 if (next_dc && next_dc->state == D_PREP &&
1522                         next_dc->bdev == bdev &&
1523                         __is_discard_front_mergeable(&di, &next_dc->di,
1524                                                         max_discard_blocks)) {
1525                         next_dc->di.lstart = di.lstart;
1526                         next_dc->di.len += di.len;
1527                         next_dc->di.start = di.start;
1528                         dcc->undiscard_blks += di.len;
1529                         __relocate_discard_cmd(dcc, next_dc);
1530                         if (tdc)
1531                                 __remove_discard_cmd(sbi, tdc);
1532                         merged = true;
1533                 }
1534 
1535                 if (!merged)
1536                         __insert_discard_cmd(sbi, bdev,
1537                                                 di.lstart, di.start, di.len);
1538  next:
1539                 prev_dc = next_dc;
1540                 if (!prev_dc)
1541                         break;
1542 
1543                 node = rb_next(&prev_dc->rb_node);
1544                 next_dc = rb_entry_safe(node, struct discard_cmd, rb_node);
1545         }
1546 }
1547 
1548 #ifdef CONFIG_BLK_DEV_ZONED
1549 static void __queue_zone_reset_cmd(struct f2fs_sb_info *sbi,
1550                 struct block_device *bdev, block_t blkstart, block_t lblkstart,
1551                 block_t blklen)
1552 {
1553         trace_f2fs_queue_reset_zone(bdev, blkstart);
1554 
1555         mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock);
1556         __insert_discard_cmd(sbi, bdev, lblkstart, blkstart, blklen);
1557         mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock);
1558 }
1559 #endif
1560 
1561 static void __queue_discard_cmd(struct f2fs_sb_info *sbi,
1562                 struct block_device *bdev, block_t blkstart, block_t blklen)
1563 {
1564         block_t lblkstart = blkstart;
1565 
1566         if (!f2fs_bdev_support_discard(bdev))
1567                 return;
1568 
1569         trace_f2fs_queue_discard(bdev, blkstart, blklen);
1570 
1571         if (f2fs_is_multi_device(sbi)) {
1572                 int devi = f2fs_target_device_index(sbi, blkstart);
1573 
1574                 blkstart -= FDEV(devi).start_blk;
1575         }
1576         mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock);
1577         __update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen);
1578         mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock);
1579 }
1580 
1581 static void __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
1582                 struct discard_policy *dpolicy, int *issued)
1583 {
1584         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1585         struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
1586         struct rb_node **insert_p = NULL, *insert_parent = NULL;
1587         struct discard_cmd *dc;
1588         struct blk_plug plug;
1589         bool io_interrupted = false;
1590 
1591         mutex_lock(&dcc->cmd_lock);
1592         dc = __lookup_discard_cmd_ret(&dcc->root, dcc->next_pos,
1593                                 &prev_dc, &next_dc, &insert_p, &insert_parent);
1594         if (!dc)
1595                 dc = next_dc;
1596 
1597         blk_start_plug(&plug);
1598 
1599         while (dc) {
1600                 struct rb_node *node;
1601                 int err = 0;
1602 
1603                 if (dc->state != D_PREP)
1604                         goto next;
1605 
1606                 if (dpolicy->io_aware && !is_idle(sbi, DISCARD_TIME)) {
1607                         io_interrupted = true;
1608                         break;
1609                 }
1610 
1611                 dcc->next_pos = dc->di.lstart + dc->di.len;
1612                 err = __submit_discard_cmd(sbi, dpolicy, dc, issued);
1613 
1614                 if (*issued >= dpolicy->max_requests)
1615                         break;
1616 next:
1617                 node = rb_next(&dc->rb_node);
1618                 if (err)
1619                         __remove_discard_cmd(sbi, dc);
1620                 dc = rb_entry_safe(node, struct discard_cmd, rb_node);
1621         }
1622 
1623         blk_finish_plug(&plug);
1624 
1625         if (!dc)
1626                 dcc->next_pos = 0;
1627 
1628         mutex_unlock(&dcc->cmd_lock);
1629 
1630         if (!(*issued) && io_interrupted)
1631                 *issued = -1;
1632 }
1633 static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
1634                                         struct discard_policy *dpolicy);
1635 
1636 static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
1637                                         struct discard_policy *dpolicy)
1638 {
1639         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1640         struct list_head *pend_list;
1641         struct discard_cmd *dc, *tmp;
1642         struct blk_plug plug;
1643         int i, issued;
1644         bool io_interrupted = false;
1645 
1646         if (dpolicy->timeout)
1647                 f2fs_update_time(sbi, UMOUNT_DISCARD_TIMEOUT);
1648 
1649 retry:
1650         issued = 0;
1651         for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
1652                 if (dpolicy->timeout &&
1653                                 f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT))
1654                         break;
1655 
1656                 if (i + 1 < dpolicy->granularity)
1657                         break;
1658 
1659                 if (i + 1 < dcc->max_ordered_discard && dpolicy->ordered) {
1660                         __issue_discard_cmd_orderly(sbi, dpolicy, &issued);
1661                         return issued;
1662                 }
1663 
1664                 pend_list = &dcc->pend_list[i];
1665 
1666                 mutex_lock(&dcc->cmd_lock);
1667                 if (list_empty(pend_list))
1668                         goto next;
1669                 if (unlikely(dcc->rbtree_check))
1670                         f2fs_bug_on(sbi, !f2fs_check_discard_tree(sbi));
1671                 blk_start_plug(&plug);
1672                 list_for_each_entry_safe(dc, tmp, pend_list, list) {
1673                         f2fs_bug_on(sbi, dc->state != D_PREP);
1674 
1675                         if (dpolicy->timeout &&
1676                                 f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT))
1677                                 break;
1678 
1679                         if (dpolicy->io_aware && i < dpolicy->io_aware_gran &&
1680                                                 !is_idle(sbi, DISCARD_TIME)) {
1681                                 io_interrupted = true;
1682                                 break;
1683                         }
1684 
1685                         __submit_discard_cmd(sbi, dpolicy, dc, &issued);
1686 
1687                         if (issued >= dpolicy->max_requests)
1688                                 break;
1689                 }
1690                 blk_finish_plug(&plug);
1691 next:
1692                 mutex_unlock(&dcc->cmd_lock);
1693 
1694                 if (issued >= dpolicy->max_requests || io_interrupted)
1695                         break;
1696         }
1697 
1698         if (dpolicy->type == DPOLICY_UMOUNT && issued) {
1699                 __wait_all_discard_cmd(sbi, dpolicy);
1700                 goto retry;
1701         }
1702 
1703         if (!issued && io_interrupted)
1704                 issued = -1;
1705 
1706         return issued;
1707 }
1708 
1709 static bool __drop_discard_cmd(struct f2fs_sb_info *sbi)
1710 {
1711         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1712         struct list_head *pend_list;
1713         struct discard_cmd *dc, *tmp;
1714         int i;
1715         bool dropped = false;
1716 
1717         mutex_lock(&dcc->cmd_lock);
1718         for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
1719                 pend_list = &dcc->pend_list[i];
1720                 list_for_each_entry_safe(dc, tmp, pend_list, list) {
1721                         f2fs_bug_on(sbi, dc->state != D_PREP);
1722                         __remove_discard_cmd(sbi, dc);
1723                         dropped = true;
1724                 }
1725         }
1726         mutex_unlock(&dcc->cmd_lock);
1727 
1728         return dropped;
1729 }
1730 
1731 void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi)
1732 {
1733         __drop_discard_cmd(sbi);
1734 }
1735 
1736 static unsigned int __wait_one_discard_bio(struct f2fs_sb_info *sbi,
1737                                                         struct discard_cmd *dc)
1738 {
1739         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1740         unsigned int len = 0;
1741 
1742         wait_for_completion_io(&dc->wait);
1743         mutex_lock(&dcc->cmd_lock);
1744         f2fs_bug_on(sbi, dc->state != D_DONE);
1745         dc->ref--;
1746         if (!dc->ref) {
1747                 if (!dc->error)
1748                         len = dc->di.len;
1749                 __remove_discard_cmd(sbi, dc);
1750         }
1751         mutex_unlock(&dcc->cmd_lock);
1752 
1753         return len;
1754 }
1755 
1756 static unsigned int __wait_discard_cmd_range(struct f2fs_sb_info *sbi,
1757                                                 struct discard_policy *dpolicy,
1758                                                 block_t start, block_t end)
1759 {
1760         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1761         struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
1762                                         &(dcc->fstrim_list) : &(dcc->wait_list);
1763         struct discard_cmd *dc = NULL, *iter, *tmp;
1764         unsigned int trimmed = 0;
1765 
1766 next:
1767         dc = NULL;
1768 
1769         mutex_lock(&dcc->cmd_lock);
1770         list_for_each_entry_safe(iter, tmp, wait_list, list) {
1771                 if (iter->di.lstart + iter->di.len <= start ||
1772                                         end <= iter->di.lstart)
1773                         continue;
1774                 if (iter->di.len < dpolicy->granularity)
1775                         continue;
1776                 if (iter->state == D_DONE && !iter->ref) {
1777                         wait_for_completion_io(&iter->wait);
1778                         if (!iter->error)
1779                                 trimmed += iter->di.len;
1780                         __remove_discard_cmd(sbi, iter);
1781                 } else {
1782                         iter->ref++;
1783                         dc = iter;
1784                         break;
1785                 }
1786         }
1787         mutex_unlock(&dcc->cmd_lock);
1788 
1789         if (dc) {
1790                 trimmed += __wait_one_discard_bio(sbi, dc);
1791                 goto next;
1792         }
1793 
1794         return trimmed;
1795 }
1796 
1797 static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
1798                                                 struct discard_policy *dpolicy)
1799 {
1800         struct discard_policy dp;
1801         unsigned int discard_blks;
1802 
1803         if (dpolicy)
1804                 return __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
1805 
1806         /* wait all */
1807         __init_discard_policy(sbi, &dp, DPOLICY_FSTRIM, MIN_DISCARD_GRANULARITY);
1808         discard_blks = __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
1809         __init_discard_policy(sbi, &dp, DPOLICY_UMOUNT, MIN_DISCARD_GRANULARITY);
1810         discard_blks += __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
1811 
1812         return discard_blks;
1813 }
1814 
1815 /* This should be covered by global mutex, &sit_i->sentry_lock */
1816 static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
1817 {
1818         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1819         struct discard_cmd *dc;
1820         bool need_wait = false;
1821 
1822         mutex_lock(&dcc->cmd_lock);
1823         dc = __lookup_discard_cmd(sbi, blkaddr);
1824 #ifdef CONFIG_BLK_DEV_ZONED
1825         if (dc && f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(dc->bdev)) {
1826                 int devi = f2fs_bdev_index(sbi, dc->bdev);
1827 
1828                 if (devi < 0) {
1829                         mutex_unlock(&dcc->cmd_lock);
1830                         return;
1831                 }
1832 
1833                 if (f2fs_blkz_is_seq(sbi, devi, dc->di.start)) {
1834                         /* force submit zone reset */
1835                         if (dc->state == D_PREP)
1836                                 __submit_zone_reset_cmd(sbi, dc, REQ_SYNC,
1837                                                         &dcc->wait_list, NULL);
1838                         dc->ref++;
1839                         mutex_unlock(&dcc->cmd_lock);
1840                         /* wait zone reset */
1841                         __wait_one_discard_bio(sbi, dc);
1842                         return;
1843                 }
1844         }
1845 #endif
1846         if (dc) {
1847                 if (dc->state == D_PREP) {
1848                         __punch_discard_cmd(sbi, dc, blkaddr);
1849                 } else {
1850                         dc->ref++;
1851                         need_wait = true;
1852                 }
1853         }
1854         mutex_unlock(&dcc->cmd_lock);
1855 
1856         if (need_wait)
1857                 __wait_one_discard_bio(sbi, dc);
1858 }
1859 
1860 void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi)
1861 {
1862         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1863 
1864         if (dcc && dcc->f2fs_issue_discard) {
1865                 struct task_struct *discard_thread = dcc->f2fs_issue_discard;
1866 
1867                 dcc->f2fs_issue_discard = NULL;
1868                 kthread_stop(discard_thread);
1869         }
1870 }
1871 
1872 /**
1873  * f2fs_issue_discard_timeout() - Issue all discard cmd within UMOUNT_DISCARD_TIMEOUT
1874  * @sbi: the f2fs_sb_info data for discard cmd to issue
1875  *
1876  * When UMOUNT_DISCARD_TIMEOUT is exceeded, all remaining discard commands will be dropped
1877  *
1878  * Return true if issued all discard cmd or no discard cmd need issue, otherwise return false.
1879  */
1880 bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi)
1881 {
1882         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1883         struct discard_policy dpolicy;
1884         bool dropped;
1885 
1886         if (!atomic_read(&dcc->discard_cmd_cnt))
1887                 return true;
1888 
1889         __init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT,
1890                                         dcc->discard_granularity);
1891         __issue_discard_cmd(sbi, &dpolicy);
1892         dropped = __drop_discard_cmd(sbi);
1893 
1894         /* just to make sure there is no pending discard commands */
1895         __wait_all_discard_cmd(sbi, NULL);
1896 
1897         f2fs_bug_on(sbi, atomic_read(&dcc->discard_cmd_cnt));
1898         return !dropped;
1899 }
1900 
1901 static int issue_discard_thread(void *data)
1902 {
1903         struct f2fs_sb_info *sbi = data;
1904         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1905         wait_queue_head_t *q = &dcc->discard_wait_queue;
1906         struct discard_policy dpolicy;
1907         unsigned int wait_ms = dcc->min_discard_issue_time;
1908         int issued;
1909 
1910         set_freezable();
1911 
1912         do {
1913                 wait_event_freezable_timeout(*q,
1914                                 kthread_should_stop() || dcc->discard_wake,
1915                                 msecs_to_jiffies(wait_ms));
1916 
1917                 if (sbi->gc_mode == GC_URGENT_HIGH ||
1918                         !f2fs_available_free_memory(sbi, DISCARD_CACHE))
1919                         __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE,
1920                                                 MIN_DISCARD_GRANULARITY);
1921                 else
1922                         __init_discard_policy(sbi, &dpolicy, DPOLICY_BG,
1923                                                 dcc->discard_granularity);
1924 
1925                 if (dcc->discard_wake)
1926                         dcc->discard_wake = false;
1927 
1928                 /* clean up pending candidates before going to sleep */
1929                 if (atomic_read(&dcc->queued_discard))
1930                         __wait_all_discard_cmd(sbi, NULL);
1931 
1932                 if (f2fs_readonly(sbi->sb))
1933                         continue;
1934                 if (kthread_should_stop())
1935                         return 0;
1936                 if (is_sbi_flag_set(sbi, SBI_NEED_FSCK) ||
1937                         !atomic_read(&dcc->discard_cmd_cnt)) {
1938                         wait_ms = dpolicy.max_interval;
1939                         continue;
1940                 }
1941 
1942                 sb_start_intwrite(sbi->sb);
1943 
1944                 issued = __issue_discard_cmd(sbi, &dpolicy);
1945                 if (issued > 0) {
1946                         __wait_all_discard_cmd(sbi, &dpolicy);
1947                         wait_ms = dpolicy.min_interval;
1948                 } else if (issued == -1) {
1949                         wait_ms = f2fs_time_to_wait(sbi, DISCARD_TIME);
1950                         if (!wait_ms)
1951                                 wait_ms = dpolicy.mid_interval;
1952                 } else {
1953                         wait_ms = dpolicy.max_interval;
1954                 }
1955                 if (!atomic_read(&dcc->discard_cmd_cnt))
1956                         wait_ms = dpolicy.max_interval;
1957 
1958                 sb_end_intwrite(sbi->sb);
1959 
1960         } while (!kthread_should_stop());
1961         return 0;
1962 }
1963 
1964 #ifdef CONFIG_BLK_DEV_ZONED
1965 static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
1966                 struct block_device *bdev, block_t blkstart, block_t blklen)
1967 {
1968         sector_t sector, nr_sects;
1969         block_t lblkstart = blkstart;
1970         int devi = 0;
1971         u64 remainder = 0;
1972 
1973         if (f2fs_is_multi_device(sbi)) {
1974                 devi = f2fs_target_device_index(sbi, blkstart);
1975                 if (blkstart < FDEV(devi).start_blk ||
1976                     blkstart > FDEV(devi).end_blk) {
1977                         f2fs_err(sbi, "Invalid block %x", blkstart);
1978                         return -EIO;
1979                 }
1980                 blkstart -= FDEV(devi).start_blk;
1981         }
1982 
1983         /* For sequential zones, reset the zone write pointer */
1984         if (f2fs_blkz_is_seq(sbi, devi, blkstart)) {
1985                 sector = SECTOR_FROM_BLOCK(blkstart);
1986                 nr_sects = SECTOR_FROM_BLOCK(blklen);
1987                 div64_u64_rem(sector, bdev_zone_sectors(bdev), &remainder);
1988 
1989                 if (remainder || nr_sects != bdev_zone_sectors(bdev)) {
1990                         f2fs_err(sbi, "(%d) %s: Unaligned zone reset attempted (block %x + %x)",
1991                                  devi, sbi->s_ndevs ? FDEV(devi).path : "",
1992                                  blkstart, blklen);
1993                         return -EIO;
1994                 }
1995 
1996                 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) {
1997                         unsigned int nofs_flags;
1998                         int ret;
1999 
2000                         trace_f2fs_issue_reset_zone(bdev, blkstart);
2001                         nofs_flags = memalloc_nofs_save();
2002                         ret = blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET,
2003                                                 sector, nr_sects);
2004                         memalloc_nofs_restore(nofs_flags);
2005                         return ret;
2006                 }
2007 
2008                 __queue_zone_reset_cmd(sbi, bdev, blkstart, lblkstart, blklen);
2009                 return 0;
2010         }
2011 
2012         /* For conventional zones, use regular discard if supported */
2013         __queue_discard_cmd(sbi, bdev, lblkstart, blklen);
2014         return 0;
2015 }
2016 #endif
2017 
2018 static int __issue_discard_async(struct f2fs_sb_info *sbi,
2019                 struct block_device *bdev, block_t blkstart, block_t blklen)
2020 {
2021 #ifdef CONFIG_BLK_DEV_ZONED
2022         if (f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(bdev))
2023                 return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen);
2024 #endif
2025         __queue_discard_cmd(sbi, bdev, blkstart, blklen);
2026         return 0;
2027 }
2028 
2029 static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
2030                                 block_t blkstart, block_t blklen)
2031 {
2032         sector_t start = blkstart, len = 0;
2033         struct block_device *bdev;
2034         struct seg_entry *se;
2035         unsigned int offset;
2036         block_t i;
2037         int err = 0;
2038 
2039         bdev = f2fs_target_device(sbi, blkstart, NULL);
2040 
2041         for (i = blkstart; i < blkstart + blklen; i++, len++) {
2042                 if (i != start) {
2043                         struct block_device *bdev2 =
2044                                 f2fs_target_device(sbi, i, NULL);
2045 
2046                         if (bdev2 != bdev) {
2047                                 err = __issue_discard_async(sbi, bdev,
2048                                                 start, len);
2049                                 if (err)
2050                                         return err;
2051                                 bdev = bdev2;
2052                                 start = i;
2053                                 len = 0;
2054                         }
2055                 }
2056 
2057                 se = get_seg_entry(sbi, GET_SEGNO(sbi, i));
2058                 offset = GET_BLKOFF_FROM_SEG0(sbi, i);
2059 
2060                 if (f2fs_block_unit_discard(sbi) &&
2061                                 !f2fs_test_and_set_bit(offset, se->discard_map))
2062                         sbi->discard_blks--;
2063         }
2064 
2065         if (len)
2066                 err = __issue_discard_async(sbi, bdev, start, len);
2067         return err;
2068 }
2069 
2070 static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
2071                                                         bool check_only)
2072 {
2073         int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
2074         struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start);
2075         unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
2076         unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
2077         unsigned long *discard_map = (unsigned long *)se->discard_map;
2078         unsigned long *dmap = SIT_I(sbi)->tmp_map;
2079         unsigned int start = 0, end = -1;
2080         bool force = (cpc->reason & CP_DISCARD);
2081         struct discard_entry *de = NULL;
2082         struct list_head *head = &SM_I(sbi)->dcc_info->entry_list;
2083         int i;
2084 
2085         if (se->valid_blocks == BLKS_PER_SEG(sbi) ||
2086             !f2fs_hw_support_discard(sbi) ||
2087             !f2fs_block_unit_discard(sbi))
2088                 return false;
2089 
2090         if (!force) {
2091                 if (!f2fs_realtime_discard_enable(sbi) || !se->valid_blocks ||
2092                         SM_I(sbi)->dcc_info->nr_discards >=
2093                                 SM_I(sbi)->dcc_info->max_discards)
2094                         return false;
2095         }
2096 
2097         /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
2098         for (i = 0; i < entries; i++)
2099                 dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] :
2100                                 (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
2101 
2102         while (force || SM_I(sbi)->dcc_info->nr_discards <=
2103                                 SM_I(sbi)->dcc_info->max_discards) {
2104                 start = __find_rev_next_bit(dmap, BLKS_PER_SEG(sbi), end + 1);
2105                 if (start >= BLKS_PER_SEG(sbi))
2106                         break;
2107 
2108                 end = __find_rev_next_zero_bit(dmap,
2109                                                 BLKS_PER_SEG(sbi), start + 1);
2110                 if (force && start && end != BLKS_PER_SEG(sbi) &&
2111                     (end - start) < cpc->trim_minlen)
2112                         continue;
2113 
2114                 if (check_only)
2115                         return true;
2116 
2117                 if (!de) {
2118                         de = f2fs_kmem_cache_alloc(discard_entry_slab,
2119                                                 GFP_F2FS_ZERO, true, NULL);
2120                         de->start_blkaddr = START_BLOCK(sbi, cpc->trim_start);
2121                         list_add_tail(&de->list, head);
2122                 }
2123 
2124                 for (i = start; i < end; i++)
2125                         __set_bit_le(i, (void *)de->discard_map);
2126 
2127                 SM_I(sbi)->dcc_info->nr_discards += end - start;
2128         }
2129         return false;
2130 }
2131 
2132 static void release_discard_addr(struct discard_entry *entry)
2133 {
2134         list_del(&entry->list);
2135         kmem_cache_free(discard_entry_slab, entry);
2136 }
2137 
2138 void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi)
2139 {
2140         struct list_head *head = &(SM_I(sbi)->dcc_info->entry_list);
2141         struct discard_entry *entry, *this;
2142 
2143         /* drop caches */
2144         list_for_each_entry_safe(entry, this, head, list)
2145                 release_discard_addr(entry);
2146 }
2147 
2148 /*
2149  * Should call f2fs_clear_prefree_segments after checkpoint is done.
2150  */
2151 static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
2152 {
2153         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2154         unsigned int segno;
2155 
2156         mutex_lock(&dirty_i->seglist_lock);
2157         for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
2158                 __set_test_and_free(sbi, segno, false);
2159         mutex_unlock(&dirty_i->seglist_lock);
2160 }
2161 
2162 void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
2163                                                 struct cp_control *cpc)
2164 {
2165         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
2166         struct list_head *head = &dcc->entry_list;
2167         struct discard_entry *entry, *this;
2168         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2169         unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
2170         unsigned int start = 0, end = -1;
2171         unsigned int secno, start_segno;
2172         bool force = (cpc->reason & CP_DISCARD);
2173         bool section_alignment = F2FS_OPTION(sbi).discard_unit ==
2174                                                 DISCARD_UNIT_SECTION;
2175 
2176         if (f2fs_lfs_mode(sbi) && __is_large_section(sbi))
2177                 section_alignment = true;
2178 
2179         mutex_lock(&dirty_i->seglist_lock);
2180 
2181         while (1) {
2182                 int i;
2183 
2184                 if (section_alignment && end != -1)
2185                         end--;
2186                 start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1);
2187                 if (start >= MAIN_SEGS(sbi))
2188                         break;
2189                 end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi),
2190                                                                 start + 1);
2191 
2192                 if (section_alignment) {
2193                         start = rounddown(start, SEGS_PER_SEC(sbi));
2194                         end = roundup(end, SEGS_PER_SEC(sbi));
2195                 }
2196 
2197                 for (i = start; i < end; i++) {
2198                         if (test_and_clear_bit(i, prefree_map))
2199                                 dirty_i->nr_dirty[PRE]--;
2200                 }
2201 
2202                 if (!f2fs_realtime_discard_enable(sbi))
2203                         continue;
2204 
2205                 if (force && start >= cpc->trim_start &&
2206                                         (end - 1) <= cpc->trim_end)
2207                         continue;
2208 
2209                 /* Should cover 2MB zoned device for zone-based reset */
2210                 if (!f2fs_sb_has_blkzoned(sbi) &&
2211                     (!f2fs_lfs_mode(sbi) || !__is_large_section(sbi))) {
2212                         f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
2213                                 SEGS_TO_BLKS(sbi, end - start));
2214                         continue;
2215                 }
2216 next:
2217                 secno = GET_SEC_FROM_SEG(sbi, start);
2218                 start_segno = GET_SEG_FROM_SEC(sbi, secno);
2219                 if (!IS_CURSEC(sbi, secno) &&
2220                         !get_valid_blocks(sbi, start, true))
2221                         f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno),
2222                                                 BLKS_PER_SEC(sbi));
2223 
2224                 start = start_segno + SEGS_PER_SEC(sbi);
2225                 if (start < end)
2226                         goto next;
2227                 else
2228                         end = start - 1;
2229         }
2230         mutex_unlock(&dirty_i->seglist_lock);
2231 
2232         if (!f2fs_block_unit_discard(sbi))
2233                 goto wakeup;
2234 
2235         /* send small discards */
2236         list_for_each_entry_safe(entry, this, head, list) {
2237                 unsigned int cur_pos = 0, next_pos, len, total_len = 0;
2238                 bool is_valid = test_bit_le(0, entry->discard_map);
2239 
2240 find_next:
2241                 if (is_valid) {
2242                         next_pos = find_next_zero_bit_le(entry->discard_map,
2243                                                 BLKS_PER_SEG(sbi), cur_pos);
2244                         len = next_pos - cur_pos;
2245 
2246                         if (f2fs_sb_has_blkzoned(sbi) ||
2247                             (force && len < cpc->trim_minlen))
2248                                 goto skip;
2249 
2250                         f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos,
2251                                                                         len);
2252                         total_len += len;
2253                 } else {
2254                         next_pos = find_next_bit_le(entry->discard_map,
2255                                                 BLKS_PER_SEG(sbi), cur_pos);
2256                 }
2257 skip:
2258                 cur_pos = next_pos;
2259                 is_valid = !is_valid;
2260 
2261                 if (cur_pos < BLKS_PER_SEG(sbi))
2262                         goto find_next;
2263 
2264                 release_discard_addr(entry);
2265                 dcc->nr_discards -= total_len;
2266         }
2267 
2268 wakeup:
2269         wake_up_discard_thread(sbi, false);
2270 }
2271 
2272 int f2fs_start_discard_thread(struct f2fs_sb_info *sbi)
2273 {
2274         dev_t dev = sbi->sb->s_bdev->bd_dev;
2275         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
2276         int err = 0;
2277 
2278         if (f2fs_sb_has_readonly(sbi)) {
2279                 f2fs_info(sbi,
2280                         "Skip to start discard thread for readonly image");
2281                 return 0;
2282         }
2283 
2284         if (!f2fs_realtime_discard_enable(sbi))
2285                 return 0;
2286 
2287         dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi,
2288                                 "f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev));
2289         if (IS_ERR(dcc->f2fs_issue_discard)) {
2290                 err = PTR_ERR(dcc->f2fs_issue_discard);
2291                 dcc->f2fs_issue_discard = NULL;
2292         }
2293 
2294         return err;
2295 }
2296 
2297 static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
2298 {
2299         struct discard_cmd_control *dcc;
2300         int err = 0, i;
2301 
2302         if (SM_I(sbi)->dcc_info) {
2303                 dcc = SM_I(sbi)->dcc_info;
2304                 goto init_thread;
2305         }
2306 
2307         dcc = f2fs_kzalloc(sbi, sizeof(struct discard_cmd_control), GFP_KERNEL);
2308         if (!dcc)
2309                 return -ENOMEM;
2310 
2311         dcc->discard_io_aware_gran = MAX_PLIST_NUM;
2312         dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY;
2313         dcc->max_ordered_discard = DEFAULT_MAX_ORDERED_DISCARD_GRANULARITY;
2314         dcc->discard_io_aware = DPOLICY_IO_AWARE_ENABLE;
2315         if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SEGMENT)
2316                 dcc->discard_granularity = BLKS_PER_SEG(sbi);
2317         else if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SECTION)
2318                 dcc->discard_granularity = BLKS_PER_SEC(sbi);
2319 
2320         INIT_LIST_HEAD(&dcc->entry_list);
2321         for (i = 0; i < MAX_PLIST_NUM; i++)
2322                 INIT_LIST_HEAD(&dcc->pend_list[i]);
2323         INIT_LIST_HEAD(&dcc->wait_list);
2324         INIT_LIST_HEAD(&dcc->fstrim_list);
2325         mutex_init(&dcc->cmd_lock);
2326         atomic_set(&dcc->issued_discard, 0);
2327         atomic_set(&dcc->queued_discard, 0);
2328         atomic_set(&dcc->discard_cmd_cnt, 0);
2329         dcc->nr_discards = 0;
2330         dcc->max_discards = SEGS_TO_BLKS(sbi, MAIN_SEGS(sbi));
2331         dcc->max_discard_request = DEF_MAX_DISCARD_REQUEST;
2332         dcc->min_discard_issue_time = DEF_MIN_DISCARD_ISSUE_TIME;
2333         dcc->mid_discard_issue_time = DEF_MID_DISCARD_ISSUE_TIME;
2334         dcc->max_discard_issue_time = DEF_MAX_DISCARD_ISSUE_TIME;
2335         dcc->discard_urgent_util = DEF_DISCARD_URGENT_UTIL;
2336         dcc->undiscard_blks = 0;
2337         dcc->next_pos = 0;
2338         dcc->root = RB_ROOT_CACHED;
2339         dcc->rbtree_check = false;
2340 
2341         init_waitqueue_head(&dcc->discard_wait_queue);
2342         SM_I(sbi)->dcc_info = dcc;
2343 init_thread:
2344         err = f2fs_start_discard_thread(sbi);
2345         if (err) {
2346                 kfree(dcc);
2347                 SM_I(sbi)->dcc_info = NULL;
2348         }
2349 
2350         return err;
2351 }
2352 
2353 static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi)
2354 {
2355         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
2356 
2357         if (!dcc)
2358                 return;
2359 
2360         f2fs_stop_discard_thread(sbi);
2361 
2362         /*
2363          * Recovery can cache discard commands, so in error path of
2364          * fill_super(), it needs to give a chance to handle them.
2365          */
2366         f2fs_issue_discard_timeout(sbi);
2367 
2368         kfree(dcc);
2369         SM_I(sbi)->dcc_info = NULL;
2370 }
2371 
2372 static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
2373 {
2374         struct sit_info *sit_i = SIT_I(sbi);
2375 
2376         if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) {
2377                 sit_i->dirty_sentries++;
2378                 return false;
2379         }
2380 
2381         return true;
2382 }
2383 
2384 static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
2385                                         unsigned int segno, int modified)
2386 {
2387         struct seg_entry *se = get_seg_entry(sbi, segno);
2388 
2389         se->type = type;
2390         if (modified)
2391                 __mark_sit_entry_dirty(sbi, segno);
2392 }
2393 
2394 static inline unsigned long long get_segment_mtime(struct f2fs_sb_info *sbi,
2395                                                                 block_t blkaddr)
2396 {
2397         unsigned int segno = GET_SEGNO(sbi, blkaddr);
2398 
2399         if (segno == NULL_SEGNO)
2400                 return 0;
2401         return get_seg_entry(sbi, segno)->mtime;
2402 }
2403 
2404 static void update_segment_mtime(struct f2fs_sb_info *sbi, block_t blkaddr,
2405                                                 unsigned long long old_mtime)
2406 {
2407         struct seg_entry *se;
2408         unsigned int segno = GET_SEGNO(sbi, blkaddr);
2409         unsigned long long ctime = get_mtime(sbi, false);
2410         unsigned long long mtime = old_mtime ? old_mtime : ctime;
2411 
2412         if (segno == NULL_SEGNO)
2413                 return;
2414 
2415         se = get_seg_entry(sbi, segno);
2416 
2417         if (!se->mtime)
2418                 se->mtime = mtime;
2419         else
2420                 se->mtime = div_u64(se->mtime * se->valid_blocks + mtime,
2421                                                 se->valid_blocks + 1);
2422 
2423         if (ctime > SIT_I(sbi)->max_mtime)
2424                 SIT_I(sbi)->max_mtime = ctime;
2425 }
2426 
2427 static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
2428 {
2429         struct seg_entry *se;
2430         unsigned int segno, offset;
2431         long int new_vblocks;
2432         bool exist;
2433 #ifdef CONFIG_F2FS_CHECK_FS
2434         bool mir_exist;
2435 #endif
2436 
2437         segno = GET_SEGNO(sbi, blkaddr);
2438         if (segno == NULL_SEGNO)
2439                 return;
2440 
2441         se = get_seg_entry(sbi, segno);
2442         new_vblocks = se->valid_blocks + del;
2443         offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
2444 
2445         f2fs_bug_on(sbi, (new_vblocks < 0 ||
2446                         (new_vblocks > f2fs_usable_blks_in_seg(sbi, segno))));
2447 
2448         se->valid_blocks = new_vblocks;
2449 
2450         /* Update valid block bitmap */
2451         if (del > 0) {
2452                 exist = f2fs_test_and_set_bit(offset, se->cur_valid_map);
2453 #ifdef CONFIG_F2FS_CHECK_FS
2454                 mir_exist = f2fs_test_and_set_bit(offset,
2455                                                 se->cur_valid_map_mir);
2456                 if (unlikely(exist != mir_exist)) {
2457                         f2fs_err(sbi, "Inconsistent error when setting bitmap, blk:%u, old bit:%d",
2458                                  blkaddr, exist);
2459                         f2fs_bug_on(sbi, 1);
2460                 }
2461 #endif
2462                 if (unlikely(exist)) {
2463                         f2fs_err(sbi, "Bitmap was wrongly set, blk:%u",
2464                                  blkaddr);
2465                         f2fs_bug_on(sbi, 1);
2466                         se->valid_blocks--;
2467                         del = 0;
2468                 }
2469 
2470                 if (f2fs_block_unit_discard(sbi) &&
2471                                 !f2fs_test_and_set_bit(offset, se->discard_map))
2472                         sbi->discard_blks--;
2473 
2474                 /*
2475                  * SSR should never reuse block which is checkpointed
2476                  * or newly invalidated.
2477                  */
2478                 if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
2479                         if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map))
2480                                 se->ckpt_valid_blocks++;
2481                 }
2482         } else {
2483                 exist = f2fs_test_and_clear_bit(offset, se->cur_valid_map);
2484 #ifdef CONFIG_F2FS_CHECK_FS
2485                 mir_exist = f2fs_test_and_clear_bit(offset,
2486                                                 se->cur_valid_map_mir);
2487                 if (unlikely(exist != mir_exist)) {
2488                         f2fs_err(sbi, "Inconsistent error when clearing bitmap, blk:%u, old bit:%d",
2489                                  blkaddr, exist);
2490                         f2fs_bug_on(sbi, 1);
2491                 }
2492 #endif
2493                 if (unlikely(!exist)) {
2494                         f2fs_err(sbi, "Bitmap was wrongly cleared, blk:%u",
2495                                  blkaddr);
2496                         f2fs_bug_on(sbi, 1);
2497                         se->valid_blocks++;
2498                         del = 0;
2499                 } else if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
2500                         /*
2501                          * If checkpoints are off, we must not reuse data that
2502                          * was used in the previous checkpoint. If it was used
2503                          * before, we must track that to know how much space we
2504                          * really have.
2505                          */
2506                         if (f2fs_test_bit(offset, se->ckpt_valid_map)) {
2507                                 spin_lock(&sbi->stat_lock);
2508                                 sbi->unusable_block_count++;
2509                                 spin_unlock(&sbi->stat_lock);
2510                         }
2511                 }
2512 
2513                 if (f2fs_block_unit_discard(sbi) &&
2514                         f2fs_test_and_clear_bit(offset, se->discard_map))
2515                         sbi->discard_blks++;
2516         }
2517         if (!f2fs_test_bit(offset, se->ckpt_valid_map))
2518                 se->ckpt_valid_blocks += del;
2519 
2520         __mark_sit_entry_dirty(sbi, segno);
2521 
2522         /* update total number of valid blocks to be written in ckpt area */
2523         SIT_I(sbi)->written_valid_blocks += del;
2524 
2525         if (__is_large_section(sbi))
2526                 get_sec_entry(sbi, segno)->valid_blocks += del;
2527 }
2528 
2529 void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
2530 {
2531         unsigned int segno = GET_SEGNO(sbi, addr);
2532         struct sit_info *sit_i = SIT_I(sbi);
2533 
2534         f2fs_bug_on(sbi, addr == NULL_ADDR);
2535         if (addr == NEW_ADDR || addr == COMPRESS_ADDR)
2536                 return;
2537 
2538         f2fs_invalidate_internal_cache(sbi, addr);
2539 
2540         /* add it into sit main buffer */
2541         down_write(&sit_i->sentry_lock);
2542 
2543         update_segment_mtime(sbi, addr, 0);
2544         update_sit_entry(sbi, addr, -1);
2545 
2546         /* add it into dirty seglist */
2547         locate_dirty_segment(sbi, segno);
2548 
2549         up_write(&sit_i->sentry_lock);
2550 }
2551 
2552 bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
2553 {
2554         struct sit_info *sit_i = SIT_I(sbi);
2555         unsigned int segno, offset;
2556         struct seg_entry *se;
2557         bool is_cp = false;
2558 
2559         if (!__is_valid_data_blkaddr(blkaddr))
2560                 return true;
2561 
2562         down_read(&sit_i->sentry_lock);
2563 
2564         segno = GET_SEGNO(sbi, blkaddr);
2565         se = get_seg_entry(sbi, segno);
2566         offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
2567 
2568         if (f2fs_test_bit(offset, se->ckpt_valid_map))
2569                 is_cp = true;
2570 
2571         up_read(&sit_i->sentry_lock);
2572 
2573         return is_cp;
2574 }
2575 
2576 static unsigned short f2fs_curseg_valid_blocks(struct f2fs_sb_info *sbi, int type)
2577 {
2578         struct curseg_info *curseg = CURSEG_I(sbi, type);
2579 
2580         if (sbi->ckpt->alloc_type[type] == SSR)
2581                 return BLKS_PER_SEG(sbi);
2582         return curseg->next_blkoff;
2583 }
2584 
2585 /*
2586  * Calculate the number of current summary pages for writing
2587  */
2588 int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
2589 {
2590         int valid_sum_count = 0;
2591         int i, sum_in_page;
2592 
2593         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
2594                 if (sbi->ckpt->alloc_type[i] != SSR && for_ra)
2595                         valid_sum_count +=
2596                                 le16_to_cpu(F2FS_CKPT(sbi)->cur_data_blkoff[i]);
2597                 else
2598                         valid_sum_count += f2fs_curseg_valid_blocks(sbi, i);
2599         }
2600 
2601         sum_in_page = (PAGE_SIZE - 2 * SUM_JOURNAL_SIZE -
2602                         SUM_FOOTER_SIZE) / SUMMARY_SIZE;
2603         if (valid_sum_count <= sum_in_page)
2604                 return 1;
2605         else if ((valid_sum_count - sum_in_page) <=
2606                 (PAGE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE)
2607                 return 2;
2608         return 3;
2609 }
2610 
2611 /*
2612  * Caller should put this summary page
2613  */
2614 struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
2615 {
2616         if (unlikely(f2fs_cp_error(sbi)))
2617                 return ERR_PTR(-EIO);
2618         return f2fs_get_meta_page_retry(sbi, GET_SUM_BLOCK(sbi, segno));
2619 }
2620 
2621 void f2fs_update_meta_page(struct f2fs_sb_info *sbi,
2622                                         void *src, block_t blk_addr)
2623 {
2624         struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
2625 
2626         memcpy(page_address(page), src, PAGE_SIZE);
2627         set_page_dirty(page);
2628         f2fs_put_page(page, 1);
2629 }
2630 
2631 static void write_sum_page(struct f2fs_sb_info *sbi,
2632                         struct f2fs_summary_block *sum_blk, block_t blk_addr)
2633 {
2634         f2fs_update_meta_page(sbi, (void *)sum_blk, blk_addr);
2635 }
2636 
2637 static void write_current_sum_page(struct f2fs_sb_info *sbi,
2638                                                 int type, block_t blk_addr)
2639 {
2640         struct curseg_info *curseg = CURSEG_I(sbi, type);
2641         struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
2642         struct f2fs_summary_block *src = curseg->sum_blk;
2643         struct f2fs_summary_block *dst;
2644 
2645         dst = (struct f2fs_summary_block *)page_address(page);
2646         memset(dst, 0, PAGE_SIZE);
2647 
2648         mutex_lock(&curseg->curseg_mutex);
2649 
2650         down_read(&curseg->journal_rwsem);
2651         memcpy(&dst->journal, curseg->journal, SUM_JOURNAL_SIZE);
2652         up_read(&curseg->journal_rwsem);
2653 
2654         memcpy(dst->entries, src->entries, SUM_ENTRY_SIZE);
2655         memcpy(&dst->footer, &src->footer, SUM_FOOTER_SIZE);
2656 
2657         mutex_unlock(&curseg->curseg_mutex);
2658 
2659         set_page_dirty(page);
2660         f2fs_put_page(page, 1);
2661 }
2662 
2663 static int is_next_segment_free(struct f2fs_sb_info *sbi,
2664                                 struct curseg_info *curseg)
2665 {
2666         unsigned int segno = curseg->segno + 1;
2667         struct free_segmap_info *free_i = FREE_I(sbi);
2668 
2669         if (segno < MAIN_SEGS(sbi) && segno % SEGS_PER_SEC(sbi))
2670                 return !test_bit(segno, free_i->free_segmap);
2671         return 0;
2672 }
2673 
2674 /*
2675  * Find a new segment from the free segments bitmap to right order
2676  * This function should be returned with success, otherwise BUG
2677  */
2678 static int get_new_segment(struct f2fs_sb_info *sbi,
2679                         unsigned int *newseg, bool new_sec, bool pinning)
2680 {
2681         struct free_segmap_info *free_i = FREE_I(sbi);
2682         unsigned int segno, secno, zoneno;
2683         unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone;
2684         unsigned int hint = GET_SEC_FROM_SEG(sbi, *newseg);
2685         unsigned int old_zoneno = GET_ZONE_FROM_SEG(sbi, *newseg);
2686         bool init = true;
2687         int i;
2688         int ret = 0;
2689 
2690         spin_lock(&free_i->segmap_lock);
2691 
2692         if (time_to_inject(sbi, FAULT_NO_SEGMENT)) {
2693                 ret = -ENOSPC;
2694                 goto out_unlock;
2695         }
2696 
2697         if (!new_sec && ((*newseg + 1) % SEGS_PER_SEC(sbi))) {
2698                 segno = find_next_zero_bit(free_i->free_segmap,
2699                         GET_SEG_FROM_SEC(sbi, hint + 1), *newseg + 1);
2700                 if (segno < GET_SEG_FROM_SEC(sbi, hint + 1))
2701                         goto got_it;
2702         }
2703 
2704 #ifdef CONFIG_BLK_DEV_ZONED
2705         /*
2706          * If we format f2fs on zoned storage, let's try to get pinned sections
2707          * from beginning of the storage, which should be a conventional one.
2708          */
2709         if (f2fs_sb_has_blkzoned(sbi)) {
2710                 /* Prioritize writing to conventional zones */
2711                 if (sbi->blkzone_alloc_policy == BLKZONE_ALLOC_PRIOR_CONV || pinning)
2712                         segno = 0;
2713                 else
2714                         segno = max(first_zoned_segno(sbi), *newseg);
2715                 hint = GET_SEC_FROM_SEG(sbi, segno);
2716         }
2717 #endif
2718 
2719 find_other_zone:
2720         secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
2721 
2722 #ifdef CONFIG_BLK_DEV_ZONED
2723         if (secno >= MAIN_SECS(sbi) && f2fs_sb_has_blkzoned(sbi)) {
2724                 /* Write only to sequential zones */
2725                 if (sbi->blkzone_alloc_policy == BLKZONE_ALLOC_ONLY_SEQ) {
2726                         hint = GET_SEC_FROM_SEG(sbi, first_zoned_segno(sbi));
2727                         secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
2728                 } else
2729                         secno = find_first_zero_bit(free_i->free_secmap,
2730                                                                 MAIN_SECS(sbi));
2731                 if (secno >= MAIN_SECS(sbi)) {
2732                         ret = -ENOSPC;
2733                         f2fs_bug_on(sbi, 1);
2734                         goto out_unlock;
2735                 }
2736         }
2737 #endif
2738 
2739         if (secno >= MAIN_SECS(sbi)) {
2740                 secno = find_first_zero_bit(free_i->free_secmap,
2741                                                         MAIN_SECS(sbi));
2742                 if (secno >= MAIN_SECS(sbi)) {
2743                         ret = -ENOSPC;
2744                         f2fs_bug_on(sbi, 1);
2745                         goto out_unlock;
2746                 }
2747         }
2748         segno = GET_SEG_FROM_SEC(sbi, secno);
2749         zoneno = GET_ZONE_FROM_SEC(sbi, secno);
2750 
2751         /* give up on finding another zone */
2752         if (!init)
2753                 goto got_it;
2754         if (sbi->secs_per_zone == 1)
2755                 goto got_it;
2756         if (zoneno == old_zoneno)
2757                 goto got_it;
2758         for (i = 0; i < NR_CURSEG_TYPE; i++)
2759                 if (CURSEG_I(sbi, i)->zone == zoneno)
2760                         break;
2761 
2762         if (i < NR_CURSEG_TYPE) {
2763                 /* zone is in user, try another */
2764                 if (zoneno + 1 >= total_zones)
2765                         hint = 0;
2766                 else
2767                         hint = (zoneno + 1) * sbi->secs_per_zone;
2768                 init = false;
2769                 goto find_other_zone;
2770         }
2771 got_it:
2772         /* set it as dirty segment in free segmap */
2773         f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap));
2774 
2775         /* no free section in conventional zone */
2776         if (new_sec && pinning &&
2777                 !f2fs_valid_pinned_area(sbi, START_BLOCK(sbi, segno))) {
2778                 ret = -EAGAIN;
2779                 goto out_unlock;
2780         }
2781         __set_inuse(sbi, segno);
2782         *newseg = segno;
2783 out_unlock:
2784         spin_unlock(&free_i->segmap_lock);
2785 
2786         if (ret == -ENOSPC)
2787                 f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_NO_SEGMENT);
2788         return ret;
2789 }
2790 
2791 static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
2792 {
2793         struct curseg_info *curseg = CURSEG_I(sbi, type);
2794         struct summary_footer *sum_footer;
2795         unsigned short seg_type = curseg->seg_type;
2796 
2797         /* only happen when get_new_segment() fails */
2798         if (curseg->next_segno == NULL_SEGNO)
2799                 return;
2800 
2801         curseg->inited = true;
2802         curseg->segno = curseg->next_segno;
2803         curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno);
2804         curseg->next_blkoff = 0;
2805         curseg->next_segno = NULL_SEGNO;
2806 
2807         sum_footer = &(curseg->sum_blk->footer);
2808         memset(sum_footer, 0, sizeof(struct summary_footer));
2809 
2810         sanity_check_seg_type(sbi, seg_type);
2811 
2812         if (IS_DATASEG(seg_type))
2813                 SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
2814         if (IS_NODESEG(seg_type))
2815                 SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
2816         __set_sit_entry_type(sbi, seg_type, curseg->segno, modified);
2817 }
2818 
2819 static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
2820 {
2821         struct curseg_info *curseg = CURSEG_I(sbi, type);
2822         unsigned short seg_type = curseg->seg_type;
2823 
2824         sanity_check_seg_type(sbi, seg_type);
2825         if (__is_large_section(sbi)) {
2826                 if (f2fs_need_rand_seg(sbi)) {
2827                         unsigned int hint = GET_SEC_FROM_SEG(sbi, curseg->segno);
2828 
2829                         if (GET_SEC_FROM_SEG(sbi, curseg->segno + 1) != hint)
2830                                 return curseg->segno;
2831                         return get_random_u32_inclusive(curseg->segno + 1,
2832                                         GET_SEG_FROM_SEC(sbi, hint + 1) - 1);
2833                 }
2834                 return curseg->segno;
2835         } else if (f2fs_need_rand_seg(sbi)) {
2836                 return get_random_u32_below(MAIN_SECS(sbi) * SEGS_PER_SEC(sbi));
2837         }
2838 
2839         /* inmem log may not locate on any segment after mount */
2840         if (!curseg->inited)
2841                 return 0;
2842 
2843         if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
2844                 return 0;
2845 
2846         if (seg_type == CURSEG_HOT_DATA || IS_NODESEG(seg_type))
2847                 return 0;
2848 
2849         if (SIT_I(sbi)->last_victim[ALLOC_NEXT])
2850                 return SIT_I(sbi)->last_victim[ALLOC_NEXT];
2851 
2852         /* find segments from 0 to reuse freed segments */
2853         if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
2854                 return 0;
2855 
2856         return curseg->segno;
2857 }
2858 
2859 /*
2860  * Allocate a current working segment.
2861  * This function always allocates a free segment in LFS manner.
2862  */
2863 static int new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
2864 {
2865         struct curseg_info *curseg = CURSEG_I(sbi, type);
2866         unsigned int segno = curseg->segno;
2867         bool pinning = type == CURSEG_COLD_DATA_PINNED;
2868         int ret;
2869 
2870         if (curseg->inited)
2871                 write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, segno));
2872 
2873         segno = __get_next_segno(sbi, type);
2874         ret = get_new_segment(sbi, &segno, new_sec, pinning);
2875         if (ret) {
2876                 if (ret == -ENOSPC)
2877                         curseg->segno = NULL_SEGNO;
2878                 return ret;
2879         }
2880 
2881         curseg->next_segno = segno;
2882         reset_curseg(sbi, type, 1);
2883         curseg->alloc_type = LFS;
2884         if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK)
2885                 curseg->fragment_remained_chunk =
2886                                 get_random_u32_inclusive(1, sbi->max_fragment_chunk);
2887         return 0;
2888 }
2889 
2890 static int __next_free_blkoff(struct f2fs_sb_info *sbi,
2891                                         int segno, block_t start)
2892 {
2893         struct seg_entry *se = get_seg_entry(sbi, segno);
2894         int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
2895         unsigned long *target_map = SIT_I(sbi)->tmp_map;
2896         unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
2897         unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
2898         int i;
2899 
2900         for (i = 0; i < entries; i++)
2901                 target_map[i] = ckpt_map[i] | cur_map[i];
2902 
2903         return __find_rev_next_zero_bit(target_map, BLKS_PER_SEG(sbi), start);
2904 }
2905 
2906 static int f2fs_find_next_ssr_block(struct f2fs_sb_info *sbi,
2907                 struct curseg_info *seg)
2908 {
2909         return __next_free_blkoff(sbi, seg->segno, seg->next_blkoff + 1);
2910 }
2911 
2912 bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno)
2913 {
2914         return __next_free_blkoff(sbi, segno, 0) < BLKS_PER_SEG(sbi);
2915 }
2916 
2917 /*
2918  * This function always allocates a used segment(from dirty seglist) by SSR
2919  * manner, so it should recover the existing segment information of valid blocks
2920  */
2921 static int change_curseg(struct f2fs_sb_info *sbi, int type)
2922 {
2923         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2924         struct curseg_info *curseg = CURSEG_I(sbi, type);
2925         unsigned int new_segno = curseg->next_segno;
2926         struct f2fs_summary_block *sum_node;
2927         struct page *sum_page;
2928 
2929         write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, curseg->segno));
2930 
2931         __set_test_and_inuse(sbi, new_segno);
2932 
2933         mutex_lock(&dirty_i->seglist_lock);
2934         __remove_dirty_segment(sbi, new_segno, PRE);
2935         __remove_dirty_segment(sbi, new_segno, DIRTY);
2936         mutex_unlock(&dirty_i->seglist_lock);
2937 
2938         reset_curseg(sbi, type, 1);
2939         curseg->alloc_type = SSR;
2940         curseg->next_blkoff = __next_free_blkoff(sbi, curseg->segno, 0);
2941 
2942         sum_page = f2fs_get_sum_page(sbi, new_segno);
2943         if (IS_ERR(sum_page)) {
2944                 /* GC won't be able to use stale summary pages by cp_error */
2945                 memset(curseg->sum_blk, 0, SUM_ENTRY_SIZE);
2946                 return PTR_ERR(sum_page);
2947         }
2948         sum_node = (struct f2fs_summary_block *)page_address(sum_page);
2949         memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
2950         f2fs_put_page(sum_page, 1);
2951         return 0;
2952 }
2953 
2954 static int get_ssr_segment(struct f2fs_sb_info *sbi, int type,
2955                                 int alloc_mode, unsigned long long age);
2956 
2957 static int get_atssr_segment(struct f2fs_sb_info *sbi, int type,
2958                                         int target_type, int alloc_mode,
2959                                         unsigned long long age)
2960 {
2961         struct curseg_info *curseg = CURSEG_I(sbi, type);
2962         int ret = 0;
2963 
2964         curseg->seg_type = target_type;
2965 
2966         if (get_ssr_segment(sbi, type, alloc_mode, age)) {
2967                 struct seg_entry *se = get_seg_entry(sbi, curseg->next_segno);
2968 
2969                 curseg->seg_type = se->type;
2970                 ret = change_curseg(sbi, type);
2971         } else {
2972                 /* allocate cold segment by default */
2973                 curseg->seg_type = CURSEG_COLD_DATA;
2974                 ret = new_curseg(sbi, type, true);
2975         }
2976         stat_inc_seg_type(sbi, curseg);
2977         return ret;
2978 }
2979 
2980 static int __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi, bool force)
2981 {
2982         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC);
2983         int ret = 0;
2984 
2985         if (!sbi->am.atgc_enabled && !force)
2986                 return 0;
2987 
2988         f2fs_down_read(&SM_I(sbi)->curseg_lock);
2989 
2990         mutex_lock(&curseg->curseg_mutex);
2991         down_write(&SIT_I(sbi)->sentry_lock);
2992 
2993         ret = get_atssr_segment(sbi, CURSEG_ALL_DATA_ATGC,
2994                                         CURSEG_COLD_DATA, SSR, 0);
2995 
2996         up_write(&SIT_I(sbi)->sentry_lock);
2997         mutex_unlock(&curseg->curseg_mutex);
2998 
2999         f2fs_up_read(&SM_I(sbi)->curseg_lock);
3000         return ret;
3001 }
3002 
3003 int f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi)
3004 {
3005         return __f2fs_init_atgc_curseg(sbi, false);
3006 }
3007 
3008 int f2fs_reinit_atgc_curseg(struct f2fs_sb_info *sbi)
3009 {
3010         int ret;
3011 
3012         if (!test_opt(sbi, ATGC))
3013                 return 0;
3014         if (sbi->am.atgc_enabled)
3015                 return 0;
3016         if (le64_to_cpu(F2FS_CKPT(sbi)->elapsed_time) <
3017                         sbi->am.age_threshold)
3018                 return 0;
3019 
3020         ret = __f2fs_init_atgc_curseg(sbi, true);
3021         if (!ret) {
3022                 sbi->am.atgc_enabled = true;
3023                 f2fs_info(sbi, "reenabled age threshold GC");
3024         }
3025         return ret;
3026 }
3027 
3028 static void __f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type)
3029 {
3030         struct curseg_info *curseg = CURSEG_I(sbi, type);
3031 
3032         mutex_lock(&curseg->curseg_mutex);
3033         if (!curseg->inited)
3034                 goto out;
3035 
3036         if (get_valid_blocks(sbi, curseg->segno, false)) {
3037                 write_sum_page(sbi, curseg->sum_blk,
3038                                 GET_SUM_BLOCK(sbi, curseg->segno));
3039         } else {
3040                 mutex_lock(&DIRTY_I(sbi)->seglist_lock);
3041                 __set_test_and_free(sbi, curseg->segno, true);
3042                 mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
3043         }
3044 out:
3045         mutex_unlock(&curseg->curseg_mutex);
3046 }
3047 
3048 void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi)
3049 {
3050         __f2fs_save_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
3051 
3052         if (sbi->am.atgc_enabled)
3053                 __f2fs_save_inmem_curseg(sbi, CURSEG_ALL_DATA_ATGC);
3054 }
3055 
3056 static void __f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type)
3057 {
3058         struct curseg_info *curseg = CURSEG_I(sbi, type);
3059 
3060         mutex_lock(&curseg->curseg_mutex);
3061         if (!curseg->inited)
3062                 goto out;
3063         if (get_valid_blocks(sbi, curseg->segno, false))
3064                 goto out;
3065 
3066         mutex_lock(&DIRTY_I(sbi)->seglist_lock);
3067         __set_test_and_inuse(sbi, curseg->segno);
3068         mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
3069 out:
3070         mutex_unlock(&curseg->curseg_mutex);
3071 }
3072 
3073 void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi)
3074 {
3075         __f2fs_restore_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
3076 
3077         if (sbi->am.atgc_enabled)
3078                 __f2fs_restore_inmem_curseg(sbi, CURSEG_ALL_DATA_ATGC);
3079 }
3080 
3081 static int get_ssr_segment(struct f2fs_sb_info *sbi, int type,
3082                                 int alloc_mode, unsigned long long age)
3083 {
3084         struct curseg_info *curseg = CURSEG_I(sbi, type);
3085         unsigned segno = NULL_SEGNO;
3086         unsigned short seg_type = curseg->seg_type;
3087         int i, cnt;
3088         bool reversed = false;
3089 
3090         sanity_check_seg_type(sbi, seg_type);
3091 
3092         /* f2fs_need_SSR() already forces to do this */
3093         if (!f2fs_get_victim(sbi, &segno, BG_GC, seg_type, alloc_mode, age)) {
3094                 curseg->next_segno = segno;
3095                 return 1;
3096         }
3097 
3098         /* For node segments, let's do SSR more intensively */
3099         if (IS_NODESEG(seg_type)) {
3100                 if (seg_type >= CURSEG_WARM_NODE) {
3101                         reversed = true;
3102                         i = CURSEG_COLD_NODE;
3103                 } else {
3104                         i = CURSEG_HOT_NODE;
3105                 }
3106                 cnt = NR_CURSEG_NODE_TYPE;
3107         } else {
3108                 if (seg_type >= CURSEG_WARM_DATA) {
3109                         reversed = true;
3110                         i = CURSEG_COLD_DATA;
3111                 } else {
3112                         i = CURSEG_HOT_DATA;
3113                 }
3114                 cnt = NR_CURSEG_DATA_TYPE;
3115         }
3116 
3117         for (; cnt-- > 0; reversed ? i-- : i++) {
3118                 if (i == seg_type)
3119                         continue;
3120                 if (!f2fs_get_victim(sbi, &segno, BG_GC, i, alloc_mode, age)) {
3121                         curseg->next_segno = segno;
3122                         return 1;
3123                 }
3124         }
3125 
3126         /* find valid_blocks=0 in dirty list */
3127         if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
3128                 segno = get_free_segment(sbi);
3129                 if (segno != NULL_SEGNO) {
3130                         curseg->next_segno = segno;
3131                         return 1;
3132                 }
3133         }
3134         return 0;
3135 }
3136 
3137 static bool need_new_seg(struct f2fs_sb_info *sbi, int type)
3138 {
3139         struct curseg_info *curseg = CURSEG_I(sbi, type);
3140 
3141         if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
3142             curseg->seg_type == CURSEG_WARM_NODE)
3143                 return true;
3144         if (curseg->alloc_type == LFS && is_next_segment_free(sbi, curseg) &&
3145             likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
3146                 return true;
3147         if (!f2fs_need_SSR(sbi) || !get_ssr_segment(sbi, type, SSR, 0))
3148                 return true;
3149         return false;
3150 }
3151 
3152 int f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
3153                                         unsigned int start, unsigned int end)
3154 {
3155         struct curseg_info *curseg = CURSEG_I(sbi, type);
3156         unsigned int segno;
3157         int ret = 0;
3158 
3159         f2fs_down_read(&SM_I(sbi)->curseg_lock);
3160         mutex_lock(&curseg->curseg_mutex);
3161         down_write(&SIT_I(sbi)->sentry_lock);
3162 
3163         segno = CURSEG_I(sbi, type)->segno;
3164         if (segno < start || segno > end)
3165                 goto unlock;
3166 
3167         if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type, SSR, 0))
3168                 ret = change_curseg(sbi, type);
3169         else
3170                 ret = new_curseg(sbi, type, true);
3171 
3172         stat_inc_seg_type(sbi, curseg);
3173 
3174         locate_dirty_segment(sbi, segno);
3175 unlock:
3176         up_write(&SIT_I(sbi)->sentry_lock);
3177 
3178         if (segno != curseg->segno)
3179                 f2fs_notice(sbi, "For resize: curseg of type %d: %u ==> %u",
3180                             type, segno, curseg->segno);
3181 
3182         mutex_unlock(&curseg->curseg_mutex);
3183         f2fs_up_read(&SM_I(sbi)->curseg_lock);
3184         return ret;
3185 }
3186 
3187 static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
3188                                                 bool new_sec, bool force)
3189 {
3190         struct curseg_info *curseg = CURSEG_I(sbi, type);
3191         unsigned int old_segno;
3192         int err = 0;
3193 
3194         if (type == CURSEG_COLD_DATA_PINNED && !curseg->inited)
3195                 goto allocate;
3196 
3197         if (!force && curseg->inited &&
3198             !curseg->next_blkoff &&
3199             !get_valid_blocks(sbi, curseg->segno, new_sec) &&
3200             !get_ckpt_valid_blocks(sbi, curseg->segno, new_sec))
3201                 return 0;
3202 
3203 allocate:
3204         old_segno = curseg->segno;
3205         err = new_curseg(sbi, type, true);
3206         if (err)
3207                 return err;
3208         stat_inc_seg_type(sbi, curseg);
3209         locate_dirty_segment(sbi, old_segno);
3210         return 0;
3211 }
3212 
3213 int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force)
3214 {
3215         int ret;
3216 
3217         f2fs_down_read(&SM_I(sbi)->curseg_lock);
3218         down_write(&SIT_I(sbi)->sentry_lock);
3219         ret = __allocate_new_segment(sbi, type, true, force);
3220         up_write(&SIT_I(sbi)->sentry_lock);
3221         f2fs_up_read(&SM_I(sbi)->curseg_lock);
3222 
3223         return ret;
3224 }
3225 
3226 int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi)
3227 {
3228         int err;
3229         bool gc_required = true;
3230 
3231 retry:
3232         f2fs_lock_op(sbi);
3233         err = f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
3234         f2fs_unlock_op(sbi);
3235 
3236         if (f2fs_sb_has_blkzoned(sbi) && err == -EAGAIN && gc_required) {
3237                 f2fs_down_write(&sbi->gc_lock);
3238                 err = f2fs_gc_range(sbi, 0, GET_SEGNO(sbi, FDEV(0).end_blk), true, 1);
3239                 f2fs_up_write(&sbi->gc_lock);
3240 
3241                 gc_required = false;
3242                 if (!err)
3243                         goto retry;
3244         }
3245 
3246         return err;
3247 }
3248 
3249 int f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
3250 {
3251         int i;
3252         int err = 0;
3253 
3254         f2fs_down_read(&SM_I(sbi)->curseg_lock);
3255         down_write(&SIT_I(sbi)->sentry_lock);
3256         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++)
3257                 err += __allocate_new_segment(sbi, i, false, false);
3258         up_write(&SIT_I(sbi)->sentry_lock);
3259         f2fs_up_read(&SM_I(sbi)->curseg_lock);
3260 
3261         return err;
3262 }
3263 
3264 bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
3265                                                 struct cp_control *cpc)
3266 {
3267         __u64 trim_start = cpc->trim_start;
3268         bool has_candidate = false;
3269 
3270         down_write(&SIT_I(sbi)->sentry_lock);
3271         for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) {
3272                 if (add_discard_addrs(sbi, cpc, true)) {
3273                         has_candidate = true;
3274                         break;
3275                 }
3276         }
3277         up_write(&SIT_I(sbi)->sentry_lock);
3278 
3279         cpc->trim_start = trim_start;
3280         return has_candidate;
3281 }
3282 
3283 static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
3284                                         struct discard_policy *dpolicy,
3285                                         unsigned int start, unsigned int end)
3286 {
3287         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
3288         struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
3289         struct rb_node **insert_p = NULL, *insert_parent = NULL;
3290         struct discard_cmd *dc;
3291         struct blk_plug plug;
3292         int issued;
3293         unsigned int trimmed = 0;
3294 
3295 next:
3296         issued = 0;
3297 
3298         mutex_lock(&dcc->cmd_lock);
3299         if (unlikely(dcc->rbtree_check))
3300                 f2fs_bug_on(sbi, !f2fs_check_discard_tree(sbi));
3301 
3302         dc = __lookup_discard_cmd_ret(&dcc->root, start,
3303                                 &prev_dc, &next_dc, &insert_p, &insert_parent);
3304         if (!dc)
3305                 dc = next_dc;
3306 
3307         blk_start_plug(&plug);
3308 
3309         while (dc && dc->di.lstart <= end) {
3310                 struct rb_node *node;
3311                 int err = 0;
3312 
3313                 if (dc->di.len < dpolicy->granularity)
3314                         goto skip;
3315 
3316                 if (dc->state != D_PREP) {
3317                         list_move_tail(&dc->list, &dcc->fstrim_list);
3318                         goto skip;
3319                 }
3320 
3321                 err = __submit_discard_cmd(sbi, dpolicy, dc, &issued);
3322 
3323                 if (issued >= dpolicy->max_requests) {
3324                         start = dc->di.lstart + dc->di.len;
3325 
3326                         if (err)
3327                                 __remove_discard_cmd(sbi, dc);
3328 
3329                         blk_finish_plug(&plug);
3330                         mutex_unlock(&dcc->cmd_lock);
3331                         trimmed += __wait_all_discard_cmd(sbi, NULL);
3332                         f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
3333                         goto next;
3334                 }
3335 skip:
3336                 node = rb_next(&dc->rb_node);
3337                 if (err)
3338                         __remove_discard_cmd(sbi, dc);
3339                 dc = rb_entry_safe(node, struct discard_cmd, rb_node);
3340 
3341                 if (fatal_signal_pending(current))
3342                         break;
3343         }
3344 
3345         blk_finish_plug(&plug);
3346         mutex_unlock(&dcc->cmd_lock);
3347 
3348         return trimmed;
3349 }
3350 
3351 int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
3352 {
3353         __u64 start = F2FS_BYTES_TO_BLK(range->start);
3354         __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1;
3355         unsigned int start_segno, end_segno;
3356         block_t start_block, end_block;
3357         struct cp_control cpc;
3358         struct discard_policy dpolicy;
3359         unsigned long long trimmed = 0;
3360         int err = 0;
3361         bool need_align = f2fs_lfs_mode(sbi) && __is_large_section(sbi);
3362 
3363         if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
3364                 return -EINVAL;
3365 
3366         if (end < MAIN_BLKADDR(sbi))
3367                 goto out;
3368 
3369         if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
3370                 f2fs_warn(sbi, "Found FS corruption, run fsck to fix.");
3371                 return -EFSCORRUPTED;
3372         }
3373 
3374         /* start/end segment number in main_area */
3375         start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
3376         end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
3377                                                 GET_SEGNO(sbi, end);
3378         if (need_align) {
3379                 start_segno = rounddown(start_segno, SEGS_PER_SEC(sbi));
3380                 end_segno = roundup(end_segno + 1, SEGS_PER_SEC(sbi)) - 1;
3381         }
3382 
3383         cpc.reason = CP_DISCARD;
3384         cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
3385         cpc.trim_start = start_segno;
3386         cpc.trim_end = end_segno;
3387 
3388         if (sbi->discard_blks == 0)
3389                 goto out;
3390 
3391         f2fs_down_write(&sbi->gc_lock);
3392         stat_inc_cp_call_count(sbi, TOTAL_CALL);
3393         err = f2fs_write_checkpoint(sbi, &cpc);
3394         f2fs_up_write(&sbi->gc_lock);
3395         if (err)
3396                 goto out;
3397 
3398         /*
3399          * We filed discard candidates, but actually we don't need to wait for
3400          * all of them, since they'll be issued in idle time along with runtime
3401          * discard option. User configuration looks like using runtime discard
3402          * or periodic fstrim instead of it.
3403          */
3404         if (f2fs_realtime_discard_enable(sbi))
3405                 goto out;
3406 
3407         start_block = START_BLOCK(sbi, start_segno);
3408         end_block = START_BLOCK(sbi, end_segno + 1);
3409 
3410         __init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen);
3411         trimmed = __issue_discard_cmd_range(sbi, &dpolicy,
3412                                         start_block, end_block);
3413 
3414         trimmed += __wait_discard_cmd_range(sbi, &dpolicy,
3415                                         start_block, end_block);
3416 out:
3417         if (!err)
3418                 range->len = F2FS_BLK_TO_BYTES(trimmed);
3419         return err;
3420 }
3421 
3422 int f2fs_rw_hint_to_seg_type(struct f2fs_sb_info *sbi, enum rw_hint hint)
3423 {
3424         if (F2FS_OPTION(sbi).active_logs == 2)
3425                 return CURSEG_HOT_DATA;
3426         else if (F2FS_OPTION(sbi).active_logs == 4)
3427                 return CURSEG_COLD_DATA;
3428 
3429         /* active_log == 6 */
3430         switch (hint) {
3431         case WRITE_LIFE_SHORT:
3432                 return CURSEG_HOT_DATA;
3433         case WRITE_LIFE_EXTREME:
3434                 return CURSEG_COLD_DATA;
3435         default:
3436                 return CURSEG_WARM_DATA;
3437         }
3438 }
3439 
3440 /*
3441  * This returns write hints for each segment type. This hints will be
3442  * passed down to block layer as below by default.
3443  *
3444  * User                  F2FS                     Block
3445  * ----                  ----                     -----
3446  *                       META                     WRITE_LIFE_NONE|REQ_META
3447  *                       HOT_NODE                 WRITE_LIFE_NONE
3448  *                       WARM_NODE                WRITE_LIFE_MEDIUM
3449  *                       COLD_NODE                WRITE_LIFE_LONG
3450  * ioctl(COLD)           COLD_DATA                WRITE_LIFE_EXTREME
3451  * extension list        "                        "
3452  *
3453  * -- buffered io
3454  *                       COLD_DATA                WRITE_LIFE_EXTREME
3455  *                       HOT_DATA                 WRITE_LIFE_SHORT
3456  *                       WARM_DATA                WRITE_LIFE_NOT_SET
3457  *
3458  * -- direct io
3459  * WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
3460  * WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
3461  * WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_NOT_SET
3462  * WRITE_LIFE_NONE       "                        WRITE_LIFE_NONE
3463  * WRITE_LIFE_MEDIUM     "                        WRITE_LIFE_MEDIUM
3464  * WRITE_LIFE_LONG       "                        WRITE_LIFE_LONG
3465  */
3466 enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi,
3467                                 enum page_type type, enum temp_type temp)
3468 {
3469         switch (type) {
3470         case DATA:
3471                 switch (temp) {
3472                 case WARM:
3473                         return WRITE_LIFE_NOT_SET;
3474                 case HOT:
3475                         return WRITE_LIFE_SHORT;
3476                 case COLD:
3477                         return WRITE_LIFE_EXTREME;
3478                 default:
3479                         return WRITE_LIFE_NONE;
3480                 }
3481         case NODE:
3482                 switch (temp) {
3483                 case WARM:
3484                         return WRITE_LIFE_MEDIUM;
3485                 case HOT:
3486                         return WRITE_LIFE_NONE;
3487                 case COLD:
3488                         return WRITE_LIFE_LONG;
3489                 default:
3490                         return WRITE_LIFE_NONE;
3491                 }
3492         case META:
3493                 return WRITE_LIFE_NONE;
3494         default:
3495                 return WRITE_LIFE_NONE;
3496         }
3497 }
3498 
3499 static int __get_segment_type_2(struct f2fs_io_info *fio)
3500 {
3501         if (fio->type == DATA)
3502                 return CURSEG_HOT_DATA;
3503         else
3504                 return CURSEG_HOT_NODE;
3505 }
3506 
3507 static int __get_segment_type_4(struct f2fs_io_info *fio)
3508 {
3509         if (fio->type == DATA) {
3510                 struct inode *inode = fio->page->mapping->host;
3511 
3512                 if (S_ISDIR(inode->i_mode))
3513                         return CURSEG_HOT_DATA;
3514                 else
3515                         return CURSEG_COLD_DATA;
3516         } else {
3517                 if (IS_DNODE(fio->page) && is_cold_node(fio->page))
3518                         return CURSEG_WARM_NODE;
3519                 else
3520                         return CURSEG_COLD_NODE;
3521         }
3522 }
3523 
3524 static int __get_age_segment_type(struct inode *inode, pgoff_t pgofs)
3525 {
3526         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3527         struct extent_info ei = {};
3528 
3529         if (f2fs_lookup_age_extent_cache(inode, pgofs, &ei)) {
3530                 if (!ei.age)
3531                         return NO_CHECK_TYPE;
3532                 if (ei.age <= sbi->hot_data_age_threshold)
3533                         return CURSEG_HOT_DATA;
3534                 if (ei.age <= sbi->warm_data_age_threshold)
3535                         return CURSEG_WARM_DATA;
3536                 return CURSEG_COLD_DATA;
3537         }
3538         return NO_CHECK_TYPE;
3539 }
3540 
3541 static int __get_segment_type_6(struct f2fs_io_info *fio)
3542 {
3543         if (fio->type == DATA) {
3544                 struct inode *inode = fio->page->mapping->host;
3545                 int type;
3546 
3547                 if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
3548                         return CURSEG_COLD_DATA_PINNED;
3549 
3550                 if (page_private_gcing(fio->page)) {
3551                         if (fio->sbi->am.atgc_enabled &&
3552                                 (fio->io_type == FS_DATA_IO) &&
3553                                 (fio->sbi->gc_mode != GC_URGENT_HIGH) &&
3554                                 __is_valid_data_blkaddr(fio->old_blkaddr) &&
3555                                 !is_inode_flag_set(inode, FI_OPU_WRITE))
3556                                 return CURSEG_ALL_DATA_ATGC;
3557                         else
3558                                 return CURSEG_COLD_DATA;
3559                 }
3560                 if (file_is_cold(inode) || f2fs_need_compress_data(inode))
3561                         return CURSEG_COLD_DATA;
3562 
3563                 type = __get_age_segment_type(inode, fio->page->index);
3564                 if (type != NO_CHECK_TYPE)
3565                         return type;
3566 
3567                 if (file_is_hot(inode) ||
3568                                 is_inode_flag_set(inode, FI_HOT_DATA) ||
3569                                 f2fs_is_cow_file(inode))
3570                         return CURSEG_HOT_DATA;
3571                 return f2fs_rw_hint_to_seg_type(F2FS_I_SB(inode),
3572                                                 inode->i_write_hint);
3573         } else {
3574                 if (IS_DNODE(fio->page))
3575                         return is_cold_node(fio->page) ? CURSEG_WARM_NODE :
3576                                                 CURSEG_HOT_NODE;
3577                 return CURSEG_COLD_NODE;
3578         }
3579 }
3580 
3581 int f2fs_get_segment_temp(int seg_type)
3582 {
3583         if (IS_HOT(seg_type))
3584                 return HOT;
3585         else if (IS_WARM(seg_type))
3586                 return WARM;
3587         return COLD;
3588 }
3589 
3590 static int __get_segment_type(struct f2fs_io_info *fio)
3591 {
3592         int type = 0;
3593 
3594         switch (F2FS_OPTION(fio->sbi).active_logs) {
3595         case 2:
3596                 type = __get_segment_type_2(fio);
3597                 break;
3598         case 4:
3599                 type = __get_segment_type_4(fio);
3600                 break;
3601         case 6:
3602                 type = __get_segment_type_6(fio);
3603                 break;
3604         default:
3605                 f2fs_bug_on(fio->sbi, true);
3606         }
3607 
3608         fio->temp = f2fs_get_segment_temp(type);
3609 
3610         return type;
3611 }
3612 
3613 static void f2fs_randomize_chunk(struct f2fs_sb_info *sbi,
3614                 struct curseg_info *seg)
3615 {
3616         /* To allocate block chunks in different sizes, use random number */
3617         if (--seg->fragment_remained_chunk > 0)
3618                 return;
3619 
3620         seg->fragment_remained_chunk =
3621                 get_random_u32_inclusive(1, sbi->max_fragment_chunk);
3622         seg->next_blkoff +=
3623                 get_random_u32_inclusive(1, sbi->max_fragment_hole);
3624 }
3625 
3626 static void reset_curseg_fields(struct curseg_info *curseg)
3627 {
3628         curseg->inited = false;
3629         curseg->segno = NULL_SEGNO;
3630         curseg->next_segno = 0;
3631 }
3632 
3633 int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
3634                 block_t old_blkaddr, block_t *new_blkaddr,
3635                 struct f2fs_summary *sum, int type,
3636                 struct f2fs_io_info *fio)
3637 {
3638         struct sit_info *sit_i = SIT_I(sbi);
3639         struct curseg_info *curseg = CURSEG_I(sbi, type);
3640         unsigned long long old_mtime;
3641         bool from_gc = (type == CURSEG_ALL_DATA_ATGC);
3642         struct seg_entry *se = NULL;
3643         bool segment_full = false;
3644         int ret = 0;
3645 
3646         f2fs_down_read(&SM_I(sbi)->curseg_lock);
3647 
3648         mutex_lock(&curseg->curseg_mutex);
3649         down_write(&sit_i->sentry_lock);
3650 
3651         if (curseg->segno == NULL_SEGNO) {
3652                 ret = -ENOSPC;
3653                 goto out_err;
3654         }
3655 
3656         if (from_gc) {
3657                 f2fs_bug_on(sbi, GET_SEGNO(sbi, old_blkaddr) == NULL_SEGNO);
3658                 se = get_seg_entry(sbi, GET_SEGNO(sbi, old_blkaddr));
3659                 sanity_check_seg_type(sbi, se->type);
3660                 f2fs_bug_on(sbi, IS_NODESEG(se->type));
3661         }
3662         *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
3663 
3664         f2fs_bug_on(sbi, curseg->next_blkoff >= BLKS_PER_SEG(sbi));
3665 
3666         f2fs_wait_discard_bio(sbi, *new_blkaddr);
3667 
3668         curseg->sum_blk->entries[curseg->next_blkoff] = *sum;
3669         if (curseg->alloc_type == SSR) {
3670                 curseg->next_blkoff = f2fs_find_next_ssr_block(sbi, curseg);
3671         } else {
3672                 curseg->next_blkoff++;
3673                 if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK)
3674                         f2fs_randomize_chunk(sbi, curseg);
3675         }
3676         if (curseg->next_blkoff >= f2fs_usable_blks_in_seg(sbi, curseg->segno))
3677                 segment_full = true;
3678         stat_inc_block_count(sbi, curseg);
3679 
3680         if (from_gc) {
3681                 old_mtime = get_segment_mtime(sbi, old_blkaddr);
3682         } else {
3683                 update_segment_mtime(sbi, old_blkaddr, 0);
3684                 old_mtime = 0;
3685         }
3686         update_segment_mtime(sbi, *new_blkaddr, old_mtime);
3687 
3688         /*
3689          * SIT information should be updated before segment allocation,
3690          * since SSR needs latest valid block information.
3691          */
3692         update_sit_entry(sbi, *new_blkaddr, 1);
3693         update_sit_entry(sbi, old_blkaddr, -1);
3694 
3695         /*
3696          * If the current segment is full, flush it out and replace it with a
3697          * new segment.
3698          */
3699         if (segment_full) {
3700                 if (type == CURSEG_COLD_DATA_PINNED &&
3701                     !((curseg->segno + 1) % sbi->segs_per_sec)) {
3702                         write_sum_page(sbi, curseg->sum_blk,
3703                                         GET_SUM_BLOCK(sbi, curseg->segno));
3704                         reset_curseg_fields(curseg);
3705                         goto skip_new_segment;
3706                 }
3707 
3708                 if (from_gc) {
3709                         ret = get_atssr_segment(sbi, type, se->type,
3710                                                 AT_SSR, se->mtime);
3711                 } else {
3712                         if (need_new_seg(sbi, type))
3713                                 ret = new_curseg(sbi, type, false);
3714                         else
3715                                 ret = change_curseg(sbi, type);
3716                         stat_inc_seg_type(sbi, curseg);
3717                 }
3718 
3719                 if (ret)
3720                         goto out_err;
3721         }
3722 
3723 skip_new_segment:
3724         /*
3725          * segment dirty status should be updated after segment allocation,
3726          * so we just need to update status only one time after previous
3727          * segment being closed.
3728          */
3729         locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
3730         locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr));
3731 
3732         if (IS_DATASEG(curseg->seg_type))
3733                 atomic64_inc(&sbi->allocated_data_blocks);
3734 
3735         up_write(&sit_i->sentry_lock);
3736 
3737         if (page && IS_NODESEG(curseg->seg_type)) {
3738                 fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
3739 
3740                 f2fs_inode_chksum_set(sbi, page);
3741         }
3742 
3743         if (fio) {
3744                 struct f2fs_bio_info *io;
3745 
3746                 INIT_LIST_HEAD(&fio->list);
3747                 fio->in_list = 1;
3748                 io = sbi->write_io[fio->type] + fio->temp;
3749                 spin_lock(&io->io_lock);
3750                 list_add_tail(&fio->list, &io->io_list);
3751                 spin_unlock(&io->io_lock);
3752         }
3753 
3754         mutex_unlock(&curseg->curseg_mutex);
3755         f2fs_up_read(&SM_I(sbi)->curseg_lock);
3756         return 0;
3757 
3758 out_err:
3759         *new_blkaddr = NULL_ADDR;
3760         up_write(&sit_i->sentry_lock);
3761         mutex_unlock(&curseg->curseg_mutex);
3762         f2fs_up_read(&SM_I(sbi)->curseg_lock);
3763         return ret;
3764 }
3765 
3766 void f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino,
3767                                         block_t blkaddr, unsigned int blkcnt)
3768 {
3769         if (!f2fs_is_multi_device(sbi))
3770                 return;
3771 
3772         while (1) {
3773                 unsigned int devidx = f2fs_target_device_index(sbi, blkaddr);
3774                 unsigned int blks = FDEV(devidx).end_blk - blkaddr + 1;
3775 
3776                 /* update device state for fsync */
3777                 f2fs_set_dirty_device(sbi, ino, devidx, FLUSH_INO);
3778 
3779                 /* update device state for checkpoint */
3780                 if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) {
3781                         spin_lock(&sbi->dev_lock);
3782                         f2fs_set_bit(devidx, (char *)&sbi->dirty_device);
3783                         spin_unlock(&sbi->dev_lock);
3784                 }
3785 
3786                 if (blkcnt <= blks)
3787                         break;
3788                 blkcnt -= blks;
3789                 blkaddr += blks;
3790         }
3791 }
3792 
3793 static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
3794 {
3795         int type = __get_segment_type(fio);
3796         bool keep_order = (f2fs_lfs_mode(fio->sbi) && type == CURSEG_COLD_DATA);
3797 
3798         if (keep_order)
3799                 f2fs_down_read(&fio->sbi->io_order_lock);
3800 
3801         if (f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
3802                         &fio->new_blkaddr, sum, type, fio)) {
3803                 if (fscrypt_inode_uses_fs_layer_crypto(fio->page->mapping->host))
3804                         fscrypt_finalize_bounce_page(&fio->encrypted_page);
3805                 end_page_writeback(fio->page);
3806                 if (f2fs_in_warm_node_list(fio->sbi, fio->page))
3807                         f2fs_del_fsync_node_entry(fio->sbi, fio->page);
3808                 goto out;
3809         }
3810         if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO)
3811                 f2fs_invalidate_internal_cache(fio->sbi, fio->old_blkaddr);
3812 
3813         /* writeout dirty page into bdev */
3814         f2fs_submit_page_write(fio);
3815 
3816         f2fs_update_device_state(fio->sbi, fio->ino, fio->new_blkaddr, 1);
3817 out:
3818         if (keep_order)
3819                 f2fs_up_read(&fio->sbi->io_order_lock);
3820 }
3821 
3822 void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
3823                                         enum iostat_type io_type)
3824 {
3825         struct f2fs_io_info fio = {
3826                 .sbi = sbi,
3827                 .type = META,
3828                 .temp = HOT,
3829                 .op = REQ_OP_WRITE,
3830                 .op_flags = REQ_SYNC | REQ_META | REQ_PRIO,
3831                 .old_blkaddr = page->index,
3832                 .new_blkaddr = page->index,
3833                 .page = page,
3834                 .encrypted_page = NULL,
3835                 .in_list = 0,
3836         };
3837 
3838         if (unlikely(page->index >= MAIN_BLKADDR(sbi)))
3839                 fio.op_flags &= ~REQ_META;
3840 
3841         set_page_writeback(page);
3842         f2fs_submit_page_write(&fio);
3843 
3844         stat_inc_meta_count(sbi, page->index);
3845         f2fs_update_iostat(sbi, NULL, io_type, F2FS_BLKSIZE);
3846 }
3847 
3848 void f2fs_do_write_node_page(unsigned int nid, struct f2fs_io_info *fio)
3849 {
3850         struct f2fs_summary sum;
3851 
3852         set_summary(&sum, nid, 0, 0);
3853         do_write_page(&sum, fio);
3854 
3855         f2fs_update_iostat(fio->sbi, NULL, fio->io_type, F2FS_BLKSIZE);
3856 }
3857 
3858 void f2fs_outplace_write_data(struct dnode_of_data *dn,
3859                                         struct f2fs_io_info *fio)
3860 {
3861         struct f2fs_sb_info *sbi = fio->sbi;
3862         struct f2fs_summary sum;
3863 
3864         f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
3865         if (fio->io_type == FS_DATA_IO || fio->io_type == FS_CP_DATA_IO)
3866                 f2fs_update_age_extent_cache(dn);
3867         set_summary(&sum, dn->nid, dn->ofs_in_node, fio->version);
3868         do_write_page(&sum, fio);
3869         f2fs_update_data_blkaddr(dn, fio->new_blkaddr);
3870 
3871         f2fs_update_iostat(sbi, dn->inode, fio->io_type, F2FS_BLKSIZE);
3872 }
3873 
3874 int f2fs_inplace_write_data(struct f2fs_io_info *fio)
3875 {
3876         int err;
3877         struct f2fs_sb_info *sbi = fio->sbi;
3878         unsigned int segno;
3879 
3880         fio->new_blkaddr = fio->old_blkaddr;
3881         /* i/o temperature is needed for passing down write hints */
3882         __get_segment_type(fio);
3883 
3884         segno = GET_SEGNO(sbi, fio->new_blkaddr);
3885 
3886         if (!IS_DATASEG(get_seg_entry(sbi, segno)->type)) {
3887                 set_sbi_flag(sbi, SBI_NEED_FSCK);
3888                 f2fs_warn(sbi, "%s: incorrect segment(%u) type, run fsck to fix.",
3889                           __func__, segno);
3890                 err = -EFSCORRUPTED;
3891                 f2fs_handle_error(sbi, ERROR_INCONSISTENT_SUM_TYPE);
3892                 goto drop_bio;
3893         }
3894 
3895         if (f2fs_cp_error(sbi)) {
3896                 err = -EIO;
3897                 goto drop_bio;
3898         }
3899 
3900         if (fio->meta_gc)
3901                 f2fs_truncate_meta_inode_pages(sbi, fio->new_blkaddr, 1);
3902 
3903         stat_inc_inplace_blocks(fio->sbi);
3904 
3905         if (fio->bio && !IS_F2FS_IPU_NOCACHE(sbi))
3906                 err = f2fs_merge_page_bio(fio);
3907         else
3908                 err = f2fs_submit_page_bio(fio);
3909         if (!err) {
3910                 f2fs_update_device_state(fio->sbi, fio->ino,
3911                                                 fio->new_blkaddr, 1);
3912                 f2fs_update_iostat(fio->sbi, fio->page->mapping->host,
3913                                                 fio->io_type, F2FS_BLKSIZE);
3914         }
3915 
3916         return err;
3917 drop_bio:
3918         if (fio->bio && *(fio->bio)) {
3919                 struct bio *bio = *(fio->bio);
3920 
3921                 bio->bi_status = BLK_STS_IOERR;
3922                 bio_endio(bio);
3923                 *(fio->bio) = NULL;
3924         }
3925         return err;
3926 }
3927 
3928 static inline int __f2fs_get_curseg(struct f2fs_sb_info *sbi,
3929                                                 unsigned int segno)
3930 {
3931         int i;
3932 
3933         for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
3934                 if (CURSEG_I(sbi, i)->segno == segno)
3935                         break;
3936         }
3937         return i;
3938 }
3939 
3940 void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
3941                                 block_t old_blkaddr, block_t new_blkaddr,
3942                                 bool recover_curseg, bool recover_newaddr,
3943                                 bool from_gc)
3944 {
3945         struct sit_info *sit_i = SIT_I(sbi);
3946         struct curseg_info *curseg;
3947         unsigned int segno, old_cursegno;
3948         struct seg_entry *se;
3949         int type;
3950         unsigned short old_blkoff;
3951         unsigned char old_alloc_type;
3952 
3953         segno = GET_SEGNO(sbi, new_blkaddr);
3954         se = get_seg_entry(sbi, segno);
3955         type = se->type;
3956 
3957         f2fs_down_write(&SM_I(sbi)->curseg_lock);
3958 
3959         if (!recover_curseg) {
3960                 /* for recovery flow */
3961                 if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
3962                         if (old_blkaddr == NULL_ADDR)
3963                                 type = CURSEG_COLD_DATA;
3964                         else
3965                                 type = CURSEG_WARM_DATA;
3966                 }
3967         } else {
3968                 if (IS_CURSEG(sbi, segno)) {
3969                         /* se->type is volatile as SSR allocation */
3970                         type = __f2fs_get_curseg(sbi, segno);
3971                         f2fs_bug_on(sbi, type == NO_CHECK_TYPE);
3972                 } else {
3973                         type = CURSEG_WARM_DATA;
3974                 }
3975         }
3976 
3977         f2fs_bug_on(sbi, !IS_DATASEG(type));
3978         curseg = CURSEG_I(sbi, type);
3979 
3980         mutex_lock(&curseg->curseg_mutex);
3981         down_write(&sit_i->sentry_lock);
3982 
3983         old_cursegno = curseg->segno;
3984         old_blkoff = curseg->next_blkoff;
3985         old_alloc_type = curseg->alloc_type;
3986 
3987         /* change the current segment */
3988         if (segno != curseg->segno) {
3989                 curseg->next_segno = segno;
3990                 if (change_curseg(sbi, type))
3991                         goto out_unlock;
3992         }
3993 
3994         curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
3995         curseg->sum_blk->entries[curseg->next_blkoff] = *sum;
3996 
3997         if (!recover_curseg || recover_newaddr) {
3998                 if (!from_gc)
3999                         update_segment_mtime(sbi, new_blkaddr, 0);
4000                 update_sit_entry(sbi, new_blkaddr, 1);
4001         }
4002         if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) {
4003                 f2fs_invalidate_internal_cache(sbi, old_blkaddr);
4004                 if (!from_gc)
4005                         update_segment_mtime(sbi, old_blkaddr, 0);
4006                 update_sit_entry(sbi, old_blkaddr, -1);
4007         }
4008 
4009         locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
4010         locate_dirty_segment(sbi, GET_SEGNO(sbi, new_blkaddr));
4011 
4012         locate_dirty_segment(sbi, old_cursegno);
4013 
4014         if (recover_curseg) {
4015                 if (old_cursegno != curseg->segno) {
4016                         curseg->next_segno = old_cursegno;
4017                         if (change_curseg(sbi, type))
4018                                 goto out_unlock;
4019                 }
4020                 curseg->next_blkoff = old_blkoff;
4021                 curseg->alloc_type = old_alloc_type;
4022         }
4023 
4024 out_unlock:
4025         up_write(&sit_i->sentry_lock);
4026         mutex_unlock(&curseg->curseg_mutex);
4027         f2fs_up_write(&SM_I(sbi)->curseg_lock);
4028 }
4029 
4030 void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
4031                                 block_t old_addr, block_t new_addr,
4032                                 unsigned char version, bool recover_curseg,
4033                                 bool recover_newaddr)
4034 {
4035         struct f2fs_summary sum;
4036 
4037         set_summary(&sum, dn->nid, dn->ofs_in_node, version);
4038 
4039         f2fs_do_replace_block(sbi, &sum, old_addr, new_addr,
4040                                         recover_curseg, recover_newaddr, false);
4041 
4042         f2fs_update_data_blkaddr(dn, new_addr);
4043 }
4044 
4045 void f2fs_wait_on_page_writeback(struct page *page,
4046                                 enum page_type type, bool ordered, bool locked)
4047 {
4048         if (folio_test_writeback(page_folio(page))) {
4049                 struct f2fs_sb_info *sbi = F2FS_P_SB(page);
4050 
4051                 /* submit cached LFS IO */
4052                 f2fs_submit_merged_write_cond(sbi, NULL, page, 0, type);
4053                 /* submit cached IPU IO */
4054                 f2fs_submit_merged_ipu_write(sbi, NULL, page);
4055                 if (ordered) {
4056                         wait_on_page_writeback(page);
4057                         f2fs_bug_on(sbi, locked &&
4058                                 folio_test_writeback(page_folio(page)));
4059                 } else {
4060                         wait_for_stable_page(page);
4061                 }
4062         }
4063 }
4064 
4065 void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr)
4066 {
4067         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
4068         struct page *cpage;
4069 
4070         if (!f2fs_meta_inode_gc_required(inode))
4071                 return;
4072 
4073         if (!__is_valid_data_blkaddr(blkaddr))
4074                 return;
4075 
4076         cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
4077         if (cpage) {
4078                 f2fs_wait_on_page_writeback(cpage, DATA, true, true);
4079                 f2fs_put_page(cpage, 1);
4080         }
4081 }
4082 
4083 void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr,
4084                                                                 block_t len)
4085 {
4086         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
4087         block_t i;
4088 
4089         if (!f2fs_meta_inode_gc_required(inode))
4090                 return;
4091 
4092         for (i = 0; i < len; i++)
4093                 f2fs_wait_on_block_writeback(inode, blkaddr + i);
4094 
4095         f2fs_truncate_meta_inode_pages(sbi, blkaddr, len);
4096 }
4097 
4098 static int read_compacted_summaries(struct f2fs_sb_info *sbi)
4099 {
4100         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
4101         struct curseg_info *seg_i;
4102         unsigned char *kaddr;
4103         struct page *page;
4104         block_t start;
4105         int i, j, offset;
4106 
4107         start = start_sum_block(sbi);
4108 
4109         page = f2fs_get_meta_page(sbi, start++);
4110         if (IS_ERR(page))
4111                 return PTR_ERR(page);
4112         kaddr = (unsigned char *)page_address(page);
4113 
4114         /* Step 1: restore nat cache */
4115         seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
4116         memcpy(seg_i->journal, kaddr, SUM_JOURNAL_SIZE);
4117 
4118         /* Step 2: restore sit cache */
4119         seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
4120         memcpy(seg_i->journal, kaddr + SUM_JOURNAL_SIZE, SUM_JOURNAL_SIZE);
4121         offset = 2 * SUM_JOURNAL_SIZE;
4122 
4123         /* Step 3: restore summary entries */
4124         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
4125                 unsigned short blk_off;
4126                 unsigned int segno;
4127 
4128                 seg_i = CURSEG_I(sbi, i);
4129                 segno = le32_to_cpu(ckpt->cur_data_segno[i]);
4130                 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]);
4131                 seg_i->next_segno = segno;
4132                 reset_curseg(sbi, i, 0);
4133                 seg_i->alloc_type = ckpt->alloc_type[i];
4134                 seg_i->next_blkoff = blk_off;
4135 
4136                 if (seg_i->alloc_type == SSR)
4137                         blk_off = BLKS_PER_SEG(sbi);
4138 
4139                 for (j = 0; j < blk_off; j++) {
4140                         struct f2fs_summary *s;
4141 
4142                         s = (struct f2fs_summary *)(kaddr + offset);
4143                         seg_i->sum_blk->entries[j] = *s;
4144                         offset += SUMMARY_SIZE;
4145                         if (offset + SUMMARY_SIZE <= PAGE_SIZE -
4146                                                 SUM_FOOTER_SIZE)
4147                                 continue;
4148 
4149                         f2fs_put_page(page, 1);
4150                         page = NULL;
4151 
4152                         page = f2fs_get_meta_page(sbi, start++);
4153                         if (IS_ERR(page))
4154                                 return PTR_ERR(page);
4155                         kaddr = (unsigned char *)page_address(page);
4156                         offset = 0;
4157                 }
4158         }
4159         f2fs_put_page(page, 1);
4160         return 0;
4161 }
4162 
4163 static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
4164 {
4165         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
4166         struct f2fs_summary_block *sum;
4167         struct curseg_info *curseg;
4168         struct page *new;
4169         unsigned short blk_off;
4170         unsigned int segno = 0;
4171         block_t blk_addr = 0;
4172         int err = 0;
4173 
4174         /* get segment number and block addr */
4175         if (IS_DATASEG(type)) {
4176                 segno = le32_to_cpu(ckpt->cur_data_segno[type]);
4177                 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
4178                                                         CURSEG_HOT_DATA]);
4179                 if (__exist_node_summaries(sbi))
4180                         blk_addr = sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type);
4181                 else
4182                         blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
4183         } else {
4184                 segno = le32_to_cpu(ckpt->cur_node_segno[type -
4185                                                         CURSEG_HOT_NODE]);
4186                 blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type -
4187                                                         CURSEG_HOT_NODE]);
4188                 if (__exist_node_summaries(sbi))
4189                         blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE,
4190                                                         type - CURSEG_HOT_NODE);
4191                 else
4192                         blk_addr = GET_SUM_BLOCK(sbi, segno);
4193         }
4194 
4195         new = f2fs_get_meta_page(sbi, blk_addr);
4196         if (IS_ERR(new))
4197                 return PTR_ERR(new);
4198         sum = (struct f2fs_summary_block *)page_address(new);
4199 
4200         if (IS_NODESEG(type)) {
4201                 if (__exist_node_summaries(sbi)) {
4202                         struct f2fs_summary *ns = &sum->entries[0];
4203                         int i;
4204 
4205                         for (i = 0; i < BLKS_PER_SEG(sbi); i++, ns++) {
4206                                 ns->version = 0;
4207                                 ns->ofs_in_node = 0;
4208                         }
4209                 } else {
4210                         err = f2fs_restore_node_summary(sbi, segno, sum);
4211                         if (err)
4212                                 goto out;
4213                 }
4214         }
4215 
4216         /* set uncompleted segment to curseg */
4217         curseg = CURSEG_I(sbi, type);
4218         mutex_lock(&curseg->curseg_mutex);
4219 
4220         /* update journal info */
4221         down_write(&curseg->journal_rwsem);
4222         memcpy(curseg->journal, &sum->journal, SUM_JOURNAL_SIZE);
4223         up_write(&curseg->journal_rwsem);
4224 
4225         memcpy(curseg->sum_blk->entries, sum->entries, SUM_ENTRY_SIZE);
4226         memcpy(&curseg->sum_blk->footer, &sum->footer, SUM_FOOTER_SIZE);
4227         curseg->next_segno = segno;
4228         reset_curseg(sbi, type, 0);
4229         curseg->alloc_type = ckpt->alloc_type[type];
4230         curseg->next_blkoff = blk_off;
4231         mutex_unlock(&curseg->curseg_mutex);
4232 out:
4233         f2fs_put_page(new, 1);
4234         return err;
4235 }
4236 
4237 static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
4238 {
4239         struct f2fs_journal *sit_j = CURSEG_I(sbi, CURSEG_COLD_DATA)->journal;
4240         struct f2fs_journal *nat_j = CURSEG_I(sbi, CURSEG_HOT_DATA)->journal;
4241         int type = CURSEG_HOT_DATA;
4242         int err;
4243 
4244         if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) {
4245                 int npages = f2fs_npages_for_summary_flush(sbi, true);
4246 
4247                 if (npages >= 2)
4248                         f2fs_ra_meta_pages(sbi, start_sum_block(sbi), npages,
4249                                                         META_CP, true);
4250 
4251                 /* restore for compacted data summary */
4252                 err = read_compacted_summaries(sbi);
4253                 if (err)
4254                         return err;
4255                 type = CURSEG_HOT_NODE;
4256         }
4257 
4258         if (__exist_node_summaries(sbi))
4259                 f2fs_ra_meta_pages(sbi,
4260                                 sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type),
4261                                 NR_CURSEG_PERSIST_TYPE - type, META_CP, true);
4262 
4263         for (; type <= CURSEG_COLD_NODE; type++) {
4264                 err = read_normal_summaries(sbi, type);
4265                 if (err)
4266                         return err;
4267         }
4268 
4269         /* sanity check for summary blocks */
4270         if (nats_in_cursum(nat_j) > NAT_JOURNAL_ENTRIES ||
4271                         sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES) {
4272                 f2fs_err(sbi, "invalid journal entries nats %u sits %u",
4273                          nats_in_cursum(nat_j), sits_in_cursum(sit_j));
4274                 return -EINVAL;
4275         }
4276 
4277         return 0;
4278 }
4279 
4280 static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
4281 {
4282         struct page *page;
4283         unsigned char *kaddr;
4284         struct f2fs_summary *summary;
4285         struct curseg_info *seg_i;
4286         int written_size = 0;
4287         int i, j;
4288 
4289         page = f2fs_grab_meta_page(sbi, blkaddr++);
4290         kaddr = (unsigned char *)page_address(page);
4291         memset(kaddr, 0, PAGE_SIZE);
4292 
4293         /* Step 1: write nat cache */
4294         seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
4295         memcpy(kaddr, seg_i->journal, SUM_JOURNAL_SIZE);
4296         written_size += SUM_JOURNAL_SIZE;
4297 
4298         /* Step 2: write sit cache */
4299         seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
4300         memcpy(kaddr + written_size, seg_i->journal, SUM_JOURNAL_SIZE);
4301         written_size += SUM_JOURNAL_SIZE;
4302 
4303         /* Step 3: write summary entries */
4304         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
4305                 seg_i = CURSEG_I(sbi, i);
4306                 for (j = 0; j < f2fs_curseg_valid_blocks(sbi, i); j++) {
4307                         if (!page) {
4308                                 page = f2fs_grab_meta_page(sbi, blkaddr++);
4309                                 kaddr = (unsigned char *)page_address(page);
4310                                 memset(kaddr, 0, PAGE_SIZE);
4311                                 written_size = 0;
4312                         }
4313                         summary = (struct f2fs_summary *)(kaddr + written_size);
4314                         *summary = seg_i->sum_blk->entries[j];
4315                         written_size += SUMMARY_SIZE;
4316 
4317                         if (written_size + SUMMARY_SIZE <= PAGE_SIZE -
4318                                                         SUM_FOOTER_SIZE)
4319                                 continue;
4320 
4321                         set_page_dirty(page);
4322                         f2fs_put_page(page, 1);
4323                         page = NULL;
4324                 }
4325         }
4326         if (page) {
4327                 set_page_dirty(page);
4328                 f2fs_put_page(page, 1);
4329         }
4330 }
4331 
4332 static void write_normal_summaries(struct f2fs_sb_info *sbi,
4333                                         block_t blkaddr, int type)
4334 {
4335         int i, end;
4336 
4337         if (IS_DATASEG(type))
4338                 end = type + NR_CURSEG_DATA_TYPE;
4339         else
4340                 end = type + NR_CURSEG_NODE_TYPE;
4341 
4342         for (i = type; i < end; i++)
4343                 write_current_sum_page(sbi, i, blkaddr + (i - type));
4344 }
4345 
4346 void f2fs_write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
4347 {
4348         if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG))
4349                 write_compacted_summaries(sbi, start_blk);
4350         else
4351                 write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA);
4352 }
4353 
4354 void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
4355 {
4356         write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
4357 }
4358 
4359 int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
4360                                         unsigned int val, int alloc)
4361 {
4362         int i;
4363 
4364         if (type == NAT_JOURNAL) {
4365                 for (i = 0; i < nats_in_cursum(journal); i++) {
4366                         if (le32_to_cpu(nid_in_journal(journal, i)) == val)
4367                                 return i;
4368                 }
4369                 if (alloc && __has_cursum_space(journal, 1, NAT_JOURNAL))
4370                         return update_nats_in_cursum(journal, 1);
4371         } else if (type == SIT_JOURNAL) {
4372                 for (i = 0; i < sits_in_cursum(journal); i++)
4373                         if (le32_to_cpu(segno_in_journal(journal, i)) == val)
4374                                 return i;
4375                 if (alloc && __has_cursum_space(journal, 1, SIT_JOURNAL))
4376                         return update_sits_in_cursum(journal, 1);
4377         }
4378         return -1;
4379 }
4380 
4381 static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
4382                                         unsigned int segno)
4383 {
4384         return f2fs_get_meta_page(sbi, current_sit_addr(sbi, segno));
4385 }
4386 
4387 static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
4388                                         unsigned int start)
4389 {
4390         struct sit_info *sit_i = SIT_I(sbi);
4391         struct page *page;
4392         pgoff_t src_off, dst_off;
4393 
4394         src_off = current_sit_addr(sbi, start);
4395         dst_off = next_sit_addr(sbi, src_off);
4396 
4397         page = f2fs_grab_meta_page(sbi, dst_off);
4398         seg_info_to_sit_page(sbi, page, start);
4399 
4400         set_page_dirty(page);
4401         set_to_next_sit(sit_i, start);
4402 
4403         return page;
4404 }
4405 
4406 static struct sit_entry_set *grab_sit_entry_set(void)
4407 {
4408         struct sit_entry_set *ses =
4409                         f2fs_kmem_cache_alloc(sit_entry_set_slab,
4410                                                 GFP_NOFS, true, NULL);
4411 
4412         ses->entry_cnt = 0;
4413         INIT_LIST_HEAD(&ses->set_list);
4414         return ses;
4415 }
4416 
4417 static void release_sit_entry_set(struct sit_entry_set *ses)
4418 {
4419         list_del(&ses->set_list);
4420         kmem_cache_free(sit_entry_set_slab, ses);
4421 }
4422 
4423 static void adjust_sit_entry_set(struct sit_entry_set *ses,
4424                                                 struct list_head *head)
4425 {
4426         struct sit_entry_set *next = ses;
4427 
4428         if (list_is_last(&ses->set_list, head))
4429                 return;
4430 
4431         list_for_each_entry_continue(next, head, set_list)
4432                 if (ses->entry_cnt <= next->entry_cnt) {
4433                         list_move_tail(&ses->set_list, &next->set_list);
4434                         return;
4435                 }
4436 
4437         list_move_tail(&ses->set_list, head);
4438 }
4439 
4440 static void add_sit_entry(unsigned int segno, struct list_head *head)
4441 {
4442         struct sit_entry_set *ses;
4443         unsigned int start_segno = START_SEGNO(segno);
4444 
4445         list_for_each_entry(ses, head, set_list) {
4446                 if (ses->start_segno == start_segno) {
4447                         ses->entry_cnt++;
4448                         adjust_sit_entry_set(ses, head);
4449                         return;
4450                 }
4451         }
4452 
4453         ses = grab_sit_entry_set();
4454 
4455         ses->start_segno = start_segno;
4456         ses->entry_cnt++;
4457         list_add(&ses->set_list, head);
4458 }
4459 
4460 static void add_sits_in_set(struct f2fs_sb_info *sbi)
4461 {
4462         struct f2fs_sm_info *sm_info = SM_I(sbi);
4463         struct list_head *set_list = &sm_info->sit_entry_set;
4464         unsigned long *bitmap = SIT_I(sbi)->dirty_sentries_bitmap;
4465         unsigned int segno;
4466 
4467         for_each_set_bit(segno, bitmap, MAIN_SEGS(sbi))
4468                 add_sit_entry(segno, set_list);
4469 }
4470 
4471 static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
4472 {
4473         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
4474         struct f2fs_journal *journal = curseg->journal;
4475         int i;
4476 
4477         down_write(&curseg->journal_rwsem);
4478         for (i = 0; i < sits_in_cursum(journal); i++) {
4479                 unsigned int segno;
4480                 bool dirtied;
4481 
4482                 segno = le32_to_cpu(segno_in_journal(journal, i));
4483                 dirtied = __mark_sit_entry_dirty(sbi, segno);
4484 
4485                 if (!dirtied)
4486                         add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
4487         }
4488         update_sits_in_cursum(journal, -i);
4489         up_write(&curseg->journal_rwsem);
4490 }
4491 
4492 /*
4493  * CP calls this function, which flushes SIT entries including sit_journal,
4494  * and moves prefree segs to free segs.
4495  */
4496 void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
4497 {
4498         struct sit_info *sit_i = SIT_I(sbi);
4499         unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
4500         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
4501         struct f2fs_journal *journal = curseg->journal;
4502         struct sit_entry_set *ses, *tmp;
4503         struct list_head *head = &SM_I(sbi)->sit_entry_set;
4504         bool to_journal = !is_sbi_flag_set(sbi, SBI_IS_RESIZEFS);
4505         struct seg_entry *se;
4506 
4507         down_write(&sit_i->sentry_lock);
4508 
4509         if (!sit_i->dirty_sentries)
4510                 goto out;
4511 
4512         /*
4513          * add and account sit entries of dirty bitmap in sit entry
4514          * set temporarily
4515          */
4516         add_sits_in_set(sbi);
4517 
4518         /*
4519          * if there are no enough space in journal to store dirty sit
4520          * entries, remove all entries from journal and add and account
4521          * them in sit entry set.
4522          */
4523         if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL) ||
4524                                                                 !to_journal)
4525                 remove_sits_in_journal(sbi);
4526 
4527         /*
4528          * there are two steps to flush sit entries:
4529          * #1, flush sit entries to journal in current cold data summary block.
4530          * #2, flush sit entries to sit page.
4531          */
4532         list_for_each_entry_safe(ses, tmp, head, set_list) {
4533                 struct page *page = NULL;
4534                 struct f2fs_sit_block *raw_sit = NULL;
4535                 unsigned int start_segno = ses->start_segno;
4536                 unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK,
4537                                                 (unsigned long)MAIN_SEGS(sbi));
4538                 unsigned int segno = start_segno;
4539 
4540                 if (to_journal &&
4541                         !__has_cursum_space(journal, ses->entry_cnt, SIT_JOURNAL))
4542                         to_journal = false;
4543 
4544                 if (to_journal) {
4545                         down_write(&curseg->journal_rwsem);
4546                 } else {
4547                         page = get_next_sit_page(sbi, start_segno);
4548                         raw_sit = page_address(page);
4549                 }
4550 
4551                 /* flush dirty sit entries in region of current sit set */
4552                 for_each_set_bit_from(segno, bitmap, end) {
4553                         int offset, sit_offset;
4554 
4555                         se = get_seg_entry(sbi, segno);
4556 #ifdef CONFIG_F2FS_CHECK_FS
4557                         if (memcmp(se->cur_valid_map, se->cur_valid_map_mir,
4558                                                 SIT_VBLOCK_MAP_SIZE))
4559                                 f2fs_bug_on(sbi, 1);
4560 #endif
4561 
4562                         /* add discard candidates */
4563                         if (!(cpc->reason & CP_DISCARD)) {
4564                                 cpc->trim_start = segno;
4565                                 add_discard_addrs(sbi, cpc, false);
4566                         }
4567 
4568                         if (to_journal) {
4569                                 offset = f2fs_lookup_journal_in_cursum(journal,
4570                                                         SIT_JOURNAL, segno, 1);
4571                                 f2fs_bug_on(sbi, offset < 0);
4572                                 segno_in_journal(journal, offset) =
4573                                                         cpu_to_le32(segno);
4574                                 seg_info_to_raw_sit(se,
4575                                         &sit_in_journal(journal, offset));
4576                                 check_block_count(sbi, segno,
4577                                         &sit_in_journal(journal, offset));
4578                         } else {
4579                                 sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
4580                                 seg_info_to_raw_sit(se,
4581                                                 &raw_sit->entries[sit_offset]);
4582                                 check_block_count(sbi, segno,
4583                                                 &raw_sit->entries[sit_offset]);
4584                         }
4585 
4586                         __clear_bit(segno, bitmap);
4587                         sit_i->dirty_sentries--;
4588                         ses->entry_cnt--;
4589                 }
4590 
4591                 if (to_journal)
4592                         up_write(&curseg->journal_rwsem);
4593                 else
4594                         f2fs_put_page(page, 1);
4595 
4596                 f2fs_bug_on(sbi, ses->entry_cnt);
4597                 release_sit_entry_set(ses);
4598         }
4599 
4600         f2fs_bug_on(sbi, !list_empty(head));
4601         f2fs_bug_on(sbi, sit_i->dirty_sentries);
4602 out:
4603         if (cpc->reason & CP_DISCARD) {
4604                 __u64 trim_start = cpc->trim_start;
4605 
4606                 for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
4607                         add_discard_addrs(sbi, cpc, false);
4608 
4609                 cpc->trim_start = trim_start;
4610         }
4611         up_write(&sit_i->sentry_lock);
4612 
4613         set_prefree_as_free_segments(sbi);
4614 }
4615 
4616 static int build_sit_info(struct f2fs_sb_info *sbi)
4617 {
4618         struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
4619         struct sit_info *sit_i;
4620         unsigned int sit_segs, start;
4621         char *src_bitmap, *bitmap;
4622         unsigned int bitmap_size, main_bitmap_size, sit_bitmap_size;
4623         unsigned int discard_map = f2fs_block_unit_discard(sbi) ? 1 : 0;
4624 
4625         /* allocate memory for SIT information */
4626         sit_i = f2fs_kzalloc(sbi, sizeof(struct sit_info), GFP_KERNEL);
4627         if (!sit_i)
4628                 return -ENOMEM;
4629 
4630         SM_I(sbi)->sit_info = sit_i;
4631 
4632         sit_i->sentries =
4633                 f2fs_kvzalloc(sbi, array_size(sizeof(struct seg_entry),
4634                                               MAIN_SEGS(sbi)),
4635                               GFP_KERNEL);
4636         if (!sit_i->sentries)
4637                 return -ENOMEM;
4638 
4639         main_bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
4640         sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(sbi, main_bitmap_size,
4641                                                                 GFP_KERNEL);
4642         if (!sit_i->dirty_sentries_bitmap)
4643                 return -ENOMEM;
4644 
4645 #ifdef CONFIG_F2FS_CHECK_FS
4646         bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * (3 + discard_map);
4647 #else
4648         bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * (2 + discard_map);
4649 #endif
4650         sit_i->bitmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
4651         if (!sit_i->bitmap)
4652                 return -ENOMEM;
4653 
4654         bitmap = sit_i->bitmap;
4655 
4656         for (start = 0; start < MAIN_SEGS(sbi); start++) {
4657                 sit_i->sentries[start].cur_valid_map = bitmap;
4658                 bitmap += SIT_VBLOCK_MAP_SIZE;
4659 
4660                 sit_i->sentries[start].ckpt_valid_map = bitmap;
4661                 bitmap += SIT_VBLOCK_MAP_SIZE;
4662 
4663 #ifdef CONFIG_F2FS_CHECK_FS
4664                 sit_i->sentries[start].cur_valid_map_mir = bitmap;
4665                 bitmap += SIT_VBLOCK_MAP_SIZE;
4666 #endif
4667 
4668                 if (discard_map) {
4669                         sit_i->sentries[start].discard_map = bitmap;
4670                         bitmap += SIT_VBLOCK_MAP_SIZE;
4671                 }
4672         }
4673 
4674         sit_i->tmp_map = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
4675         if (!sit_i->tmp_map)
4676                 return -ENOMEM;
4677 
4678         if (__is_large_section(sbi)) {
4679                 sit_i->sec_entries =
4680                         f2fs_kvzalloc(sbi, array_size(sizeof(struct sec_entry),
4681                                                       MAIN_SECS(sbi)),
4682                                       GFP_KERNEL);
4683                 if (!sit_i->sec_entries)
4684                         return -ENOMEM;
4685         }
4686 
4687         /* get information related with SIT */
4688         sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1;
4689 
4690         /* setup SIT bitmap from ckeckpoint pack */
4691         sit_bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
4692         src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
4693 
4694         sit_i->sit_bitmap = kmemdup(src_bitmap, sit_bitmap_size, GFP_KERNEL);
4695         if (!sit_i->sit_bitmap)
4696                 return -ENOMEM;
4697 
4698 #ifdef CONFIG_F2FS_CHECK_FS
4699         sit_i->sit_bitmap_mir = kmemdup(src_bitmap,
4700                                         sit_bitmap_size, GFP_KERNEL);
4701         if (!sit_i->sit_bitmap_mir)
4702                 return -ENOMEM;
4703 
4704         sit_i->invalid_segmap = f2fs_kvzalloc(sbi,
4705                                         main_bitmap_size, GFP_KERNEL);
4706         if (!sit_i->invalid_segmap)
4707                 return -ENOMEM;
4708 #endif
4709 
4710         sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
4711         sit_i->sit_blocks = SEGS_TO_BLKS(sbi, sit_segs);
4712         sit_i->written_valid_blocks = 0;
4713         sit_i->bitmap_size = sit_bitmap_size;
4714         sit_i->dirty_sentries = 0;
4715         sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
4716         sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
4717         sit_i->mounted_time = ktime_get_boottime_seconds();
4718         init_rwsem(&sit_i->sentry_lock);
4719         return 0;
4720 }
4721 
4722 static int build_free_segmap(struct f2fs_sb_info *sbi)
4723 {
4724         struct free_segmap_info *free_i;
4725         unsigned int bitmap_size, sec_bitmap_size;
4726 
4727         /* allocate memory for free segmap information */
4728         free_i = f2fs_kzalloc(sbi, sizeof(struct free_segmap_info), GFP_KERNEL);
4729         if (!free_i)
4730                 return -ENOMEM;
4731 
4732         SM_I(sbi)->free_info = free_i;
4733 
4734         bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
4735         free_i->free_segmap = f2fs_kvmalloc(sbi, bitmap_size, GFP_KERNEL);
4736         if (!free_i->free_segmap)
4737                 return -ENOMEM;
4738 
4739         sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
4740         free_i->free_secmap = f2fs_kvmalloc(sbi, sec_bitmap_size, GFP_KERNEL);
4741         if (!free_i->free_secmap)
4742                 return -ENOMEM;
4743 
4744         /* set all segments as dirty temporarily */
4745         memset(free_i->free_segmap, 0xff, bitmap_size);
4746         memset(free_i->free_secmap, 0xff, sec_bitmap_size);
4747 
4748         /* init free segmap information */
4749         free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi));
4750         free_i->free_segments = 0;
4751         free_i->free_sections = 0;
4752         spin_lock_init(&free_i->segmap_lock);
4753         return 0;
4754 }
4755 
4756 static int build_curseg(struct f2fs_sb_info *sbi)
4757 {
4758         struct curseg_info *array;
4759         int i;
4760 
4761         array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE,
4762                                         sizeof(*array)), GFP_KERNEL);
4763         if (!array)
4764                 return -ENOMEM;
4765 
4766         SM_I(sbi)->curseg_array = array;
4767 
4768         for (i = 0; i < NO_CHECK_TYPE; i++) {
4769                 mutex_init(&array[i].curseg_mutex);
4770                 array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
4771                 if (!array[i].sum_blk)
4772                         return -ENOMEM;
4773                 init_rwsem(&array[i].journal_rwsem);
4774                 array[i].journal = f2fs_kzalloc(sbi,
4775                                 sizeof(struct f2fs_journal), GFP_KERNEL);
4776                 if (!array[i].journal)
4777                         return -ENOMEM;
4778                 if (i < NR_PERSISTENT_LOG)
4779                         array[i].seg_type = CURSEG_HOT_DATA + i;
4780                 else if (i == CURSEG_COLD_DATA_PINNED)
4781                         array[i].seg_type = CURSEG_COLD_DATA;
4782                 else if (i == CURSEG_ALL_DATA_ATGC)
4783                         array[i].seg_type = CURSEG_COLD_DATA;
4784                 reset_curseg_fields(&array[i]);
4785         }
4786         return restore_curseg_summaries(sbi);
4787 }
4788 
4789 static int build_sit_entries(struct f2fs_sb_info *sbi)
4790 {
4791         struct sit_info *sit_i = SIT_I(sbi);
4792         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
4793         struct f2fs_journal *journal = curseg->journal;
4794         struct seg_entry *se;
4795         struct f2fs_sit_entry sit;
4796         int sit_blk_cnt = SIT_BLK_CNT(sbi);
4797         unsigned int i, start, end;
4798         unsigned int readed, start_blk = 0;
4799         int err = 0;
4800         block_t sit_valid_blocks[2] = {0, 0};
4801 
4802         do {
4803                 readed = f2fs_ra_meta_pages(sbi, start_blk, BIO_MAX_VECS,
4804                                                         META_SIT, true);
4805 
4806                 start = start_blk * sit_i->sents_per_block;
4807                 end = (start_blk + readed) * sit_i->sents_per_block;
4808 
4809                 for (; start < end && start < MAIN_SEGS(sbi); start++) {
4810                         struct f2fs_sit_block *sit_blk;
4811                         struct page *page;
4812 
4813                         se = &sit_i->sentries[start];
4814                         page = get_current_sit_page(sbi, start);
4815                         if (IS_ERR(page))
4816                                 return PTR_ERR(page);
4817                         sit_blk = (struct f2fs_sit_block *)page_address(page);
4818                         sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
4819                         f2fs_put_page(page, 1);
4820 
4821                         err = check_block_count(sbi, start, &sit);
4822                         if (err)
4823                                 return err;
4824                         seg_info_from_raw_sit(se, &sit);
4825 
4826                         if (se->type >= NR_PERSISTENT_LOG) {
4827                                 f2fs_err(sbi, "Invalid segment type: %u, segno: %u",
4828                                                         se->type, start);
4829                                 f2fs_handle_error(sbi,
4830                                                 ERROR_INCONSISTENT_SUM_TYPE);
4831                                 return -EFSCORRUPTED;
4832                         }
4833 
4834                         sit_valid_blocks[SE_PAGETYPE(se)] += se->valid_blocks;
4835 
4836                         if (!f2fs_block_unit_discard(sbi))
4837                                 goto init_discard_map_done;
4838 
4839                         /* build discard map only one time */
4840                         if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
4841                                 memset(se->discard_map, 0xff,
4842                                                 SIT_VBLOCK_MAP_SIZE);
4843                                 goto init_discard_map_done;
4844                         }
4845                         memcpy(se->discard_map, se->cur_valid_map,
4846                                                 SIT_VBLOCK_MAP_SIZE);
4847                         sbi->discard_blks += BLKS_PER_SEG(sbi) -
4848                                                 se->valid_blocks;
4849 init_discard_map_done:
4850                         if (__is_large_section(sbi))
4851                                 get_sec_entry(sbi, start)->valid_blocks +=
4852                                                         se->valid_blocks;
4853                 }
4854                 start_blk += readed;
4855         } while (start_blk < sit_blk_cnt);
4856 
4857         down_read(&curseg->journal_rwsem);
4858         for (i = 0; i < sits_in_cursum(journal); i++) {
4859                 unsigned int old_valid_blocks;
4860 
4861                 start = le32_to_cpu(segno_in_journal(journal, i));
4862                 if (start >= MAIN_SEGS(sbi)) {
4863                         f2fs_err(sbi, "Wrong journal entry on segno %u",
4864                                  start);
4865                         err = -EFSCORRUPTED;
4866                         f2fs_handle_error(sbi, ERROR_CORRUPTED_JOURNAL);
4867                         break;
4868                 }
4869 
4870                 se = &sit_i->sentries[start];
4871                 sit = sit_in_journal(journal, i);
4872 
4873                 old_valid_blocks = se->valid_blocks;
4874 
4875                 sit_valid_blocks[SE_PAGETYPE(se)] -= old_valid_blocks;
4876 
4877                 err = check_block_count(sbi, start, &sit);
4878                 if (err)
4879                         break;
4880                 seg_info_from_raw_sit(se, &sit);
4881 
4882                 if (se->type >= NR_PERSISTENT_LOG) {
4883                         f2fs_err(sbi, "Invalid segment type: %u, segno: %u",
4884                                                         se->type, start);
4885                         err = -EFSCORRUPTED;
4886                         f2fs_handle_error(sbi, ERROR_INCONSISTENT_SUM_TYPE);
4887                         break;
4888                 }
4889 
4890                 sit_valid_blocks[SE_PAGETYPE(se)] += se->valid_blocks;
4891 
4892                 if (f2fs_block_unit_discard(sbi)) {
4893                         if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
4894                                 memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE);
4895                         } else {
4896                                 memcpy(se->discard_map, se->cur_valid_map,
4897                                                         SIT_VBLOCK_MAP_SIZE);
4898                                 sbi->discard_blks += old_valid_blocks;
4899                                 sbi->discard_blks -= se->valid_blocks;
4900                         }
4901                 }
4902 
4903                 if (__is_large_section(sbi)) {
4904                         get_sec_entry(sbi, start)->valid_blocks +=
4905                                                         se->valid_blocks;
4906                         get_sec_entry(sbi, start)->valid_blocks -=
4907                                                         old_valid_blocks;
4908                 }
4909         }
4910         up_read(&curseg->journal_rwsem);
4911 
4912         if (err)
4913                 return err;
4914 
4915         if (sit_valid_blocks[NODE] != valid_node_count(sbi)) {
4916                 f2fs_err(sbi, "SIT is corrupted node# %u vs %u",
4917                          sit_valid_blocks[NODE], valid_node_count(sbi));
4918                 f2fs_handle_error(sbi, ERROR_INCONSISTENT_NODE_COUNT);
4919                 return -EFSCORRUPTED;
4920         }
4921 
4922         if (sit_valid_blocks[DATA] + sit_valid_blocks[NODE] >
4923                                 valid_user_blocks(sbi)) {
4924                 f2fs_err(sbi, "SIT is corrupted data# %u %u vs %u",
4925                          sit_valid_blocks[DATA], sit_valid_blocks[NODE],
4926                          valid_user_blocks(sbi));
4927                 f2fs_handle_error(sbi, ERROR_INCONSISTENT_BLOCK_COUNT);
4928                 return -EFSCORRUPTED;
4929         }
4930 
4931         return 0;
4932 }
4933 
4934 static void init_free_segmap(struct f2fs_sb_info *sbi)
4935 {
4936         unsigned int start;
4937         int type;
4938         struct seg_entry *sentry;
4939 
4940         for (start = 0; start < MAIN_SEGS(sbi); start++) {
4941                 if (f2fs_usable_blks_in_seg(sbi, start) == 0)
4942                         continue;
4943                 sentry = get_seg_entry(sbi, start);
4944                 if (!sentry->valid_blocks)
4945                         __set_free(sbi, start);
4946                 else
4947                         SIT_I(sbi)->written_valid_blocks +=
4948                                                 sentry->valid_blocks;
4949         }
4950 
4951         /* set use the current segments */
4952         for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) {
4953                 struct curseg_info *curseg_t = CURSEG_I(sbi, type);
4954 
4955                 __set_test_and_inuse(sbi, curseg_t->segno);
4956         }
4957 }
4958 
4959 static void init_dirty_segmap(struct f2fs_sb_info *sbi)
4960 {
4961         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4962         struct free_segmap_info *free_i = FREE_I(sbi);
4963         unsigned int segno = 0, offset = 0, secno;
4964         block_t valid_blocks, usable_blks_in_seg;
4965 
4966         while (1) {
4967                 /* find dirty segment based on free segmap */
4968                 segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset);
4969                 if (segno >= MAIN_SEGS(sbi))
4970                         break;
4971                 offset = segno + 1;
4972                 valid_blocks = get_valid_blocks(sbi, segno, false);
4973                 usable_blks_in_seg = f2fs_usable_blks_in_seg(sbi, segno);
4974                 if (valid_blocks == usable_blks_in_seg || !valid_blocks)
4975                         continue;
4976                 if (valid_blocks > usable_blks_in_seg) {
4977                         f2fs_bug_on(sbi, 1);
4978                         continue;
4979                 }
4980                 mutex_lock(&dirty_i->seglist_lock);
4981                 __locate_dirty_segment(sbi, segno, DIRTY);
4982                 mutex_unlock(&dirty_i->seglist_lock);
4983         }
4984 
4985         if (!__is_large_section(sbi))
4986                 return;
4987 
4988         mutex_lock(&dirty_i->seglist_lock);
4989         for (segno = 0; segno < MAIN_SEGS(sbi); segno += SEGS_PER_SEC(sbi)) {
4990                 valid_blocks = get_valid_blocks(sbi, segno, true);
4991                 secno = GET_SEC_FROM_SEG(sbi, segno);
4992 
4993                 if (!valid_blocks || valid_blocks == CAP_BLKS_PER_SEC(sbi))
4994                         continue;
4995                 if (IS_CURSEC(sbi, secno))
4996                         continue;
4997                 set_bit(secno, dirty_i->dirty_secmap);
4998         }
4999         mutex_unlock(&dirty_i->seglist_lock);
5000 }
5001 
5002 static int init_victim_secmap(struct f2fs_sb_info *sbi)
5003 {
5004         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
5005         unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
5006 
5007         dirty_i->victim_secmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
5008         if (!dirty_i->victim_secmap)
5009                 return -ENOMEM;
5010 
5011         dirty_i->pinned_secmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
5012         if (!dirty_i->pinned_secmap)
5013                 return -ENOMEM;
5014 
5015         dirty_i->pinned_secmap_cnt = 0;
5016         dirty_i->enable_pin_section = true;
5017         return 0;
5018 }
5019 
5020 static int build_dirty_segmap(struct f2fs_sb_info *sbi)
5021 {
5022         struct dirty_seglist_info *dirty_i;
5023         unsigned int bitmap_size, i;
5024 
5025         /* allocate memory for dirty segments list information */
5026         dirty_i = f2fs_kzalloc(sbi, sizeof(struct dirty_seglist_info),
5027                                                                 GFP_KERNEL);
5028         if (!dirty_i)
5029                 return -ENOMEM;
5030 
5031         SM_I(sbi)->dirty_info = dirty_i;
5032         mutex_init(&dirty_i->seglist_lock);
5033 
5034         bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
5035 
5036         for (i = 0; i < NR_DIRTY_TYPE; i++) {
5037                 dirty_i->dirty_segmap[i] = f2fs_kvzalloc(sbi, bitmap_size,
5038                                                                 GFP_KERNEL);
5039                 if (!dirty_i->dirty_segmap[i])
5040                         return -ENOMEM;
5041         }
5042 
5043         if (__is_large_section(sbi)) {
5044                 bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
5045                 dirty_i->dirty_secmap = f2fs_kvzalloc(sbi,
5046                                                 bitmap_size, GFP_KERNEL);
5047                 if (!dirty_i->dirty_secmap)
5048                         return -ENOMEM;
5049         }
5050 
5051         init_dirty_segmap(sbi);
5052         return init_victim_secmap(sbi);
5053 }
5054 
5055 static int sanity_check_curseg(struct f2fs_sb_info *sbi)
5056 {
5057         int i;
5058 
5059         /*
5060          * In LFS/SSR curseg, .next_blkoff should point to an unused blkaddr;
5061          * In LFS curseg, all blkaddr after .next_blkoff should be unused.
5062          */
5063         for (i = 0; i < NR_PERSISTENT_LOG; i++) {
5064                 struct curseg_info *curseg = CURSEG_I(sbi, i);
5065                 struct seg_entry *se = get_seg_entry(sbi, curseg->segno);
5066                 unsigned int blkofs = curseg->next_blkoff;
5067 
5068                 if (f2fs_sb_has_readonly(sbi) &&
5069                         i != CURSEG_HOT_DATA && i != CURSEG_HOT_NODE)
5070                         continue;
5071 
5072                 sanity_check_seg_type(sbi, curseg->seg_type);
5073 
5074                 if (curseg->alloc_type != LFS && curseg->alloc_type != SSR) {
5075                         f2fs_err(sbi,
5076                                  "Current segment has invalid alloc_type:%d",
5077                                  curseg->alloc_type);
5078                         f2fs_handle_error(sbi, ERROR_INVALID_CURSEG);
5079                         return -EFSCORRUPTED;
5080                 }
5081 
5082                 if (f2fs_test_bit(blkofs, se->cur_valid_map))
5083                         goto out;
5084 
5085                 if (curseg->alloc_type == SSR)
5086                         continue;
5087 
5088                 for (blkofs += 1; blkofs < BLKS_PER_SEG(sbi); blkofs++) {
5089                         if (!f2fs_test_bit(blkofs, se->cur_valid_map))
5090                                 continue;
5091 out:
5092                         f2fs_err(sbi,
5093                                  "Current segment's next free block offset is inconsistent with bitmap, logtype:%u, segno:%u, type:%u, next_blkoff:%u, blkofs:%u",
5094                                  i, curseg->segno, curseg->alloc_type,
5095                                  curseg->next_blkoff, blkofs);
5096                         f2fs_handle_error(sbi, ERROR_INVALID_CURSEG);
5097                         return -EFSCORRUPTED;
5098                 }
5099         }
5100         return 0;
5101 }
5102 
5103 #ifdef CONFIG_BLK_DEV_ZONED
5104 static int check_zone_write_pointer(struct f2fs_sb_info *sbi,
5105                                     struct f2fs_dev_info *fdev,
5106                                     struct blk_zone *zone)
5107 {
5108         unsigned int zone_segno;
5109         block_t zone_block, valid_block_cnt;
5110         unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT;
5111         int ret;
5112         unsigned int nofs_flags;
5113 
5114         if (zone->type != BLK_ZONE_TYPE_SEQWRITE_REQ)
5115                 return 0;
5116 
5117         zone_block = fdev->start_blk + (zone->start >> log_sectors_per_block);
5118         zone_segno = GET_SEGNO(sbi, zone_block);
5119 
5120         /*
5121          * Skip check of zones cursegs point to, since
5122          * fix_curseg_write_pointer() checks them.
5123          */
5124         if (zone_segno >= MAIN_SEGS(sbi))
5125                 return 0;
5126 
5127         /*
5128          * Get # of valid block of the zone.
5129          */
5130         valid_block_cnt = get_valid_blocks(sbi, zone_segno, true);
5131         if (IS_CURSEC(sbi, GET_SEC_FROM_SEG(sbi, zone_segno))) {
5132                 f2fs_notice(sbi, "Open zones: valid block[0x%x,0x%x] cond[%s]",
5133                                 zone_segno, valid_block_cnt,
5134                                 blk_zone_cond_str(zone->cond));
5135                 return 0;
5136         }
5137 
5138         if ((!valid_block_cnt && zone->cond == BLK_ZONE_COND_EMPTY) ||
5139             (valid_block_cnt && zone->cond == BLK_ZONE_COND_FULL))
5140                 return 0;
5141 
5142         if (!valid_block_cnt) {
5143                 f2fs_notice(sbi, "Zone without valid block has non-zero write "
5144                             "pointer. Reset the write pointer: cond[%s]",
5145                             blk_zone_cond_str(zone->cond));
5146                 ret = __f2fs_issue_discard_zone(sbi, fdev->bdev, zone_block,
5147                                         zone->len >> log_sectors_per_block);
5148                 if (ret)
5149                         f2fs_err(sbi, "Discard zone failed: %s (errno=%d)",
5150                                  fdev->path, ret);
5151                 return ret;
5152         }
5153 
5154         /*
5155          * If there are valid blocks and the write pointer doesn't match
5156          * with them, we need to report the inconsistency and fill
5157          * the zone till the end to close the zone. This inconsistency
5158          * does not cause write error because the zone will not be
5159          * selected for write operation until it get discarded.
5160          */
5161         f2fs_notice(sbi, "Valid blocks are not aligned with write "
5162                     "pointer: valid block[0x%x,0x%x] cond[%s]",
5163                     zone_segno, valid_block_cnt, blk_zone_cond_str(zone->cond));
5164 
5165         nofs_flags = memalloc_nofs_save();
5166         ret = blkdev_zone_mgmt(fdev->bdev, REQ_OP_ZONE_FINISH,
5167                                 zone->start, zone->len);
5168         memalloc_nofs_restore(nofs_flags);
5169         if (ret == -EOPNOTSUPP) {
5170                 ret = blkdev_issue_zeroout(fdev->bdev, zone->wp,
5171                                         zone->len - (zone->wp - zone->start),
5172                                         GFP_NOFS, 0);
5173                 if (ret)
5174                         f2fs_err(sbi, "Fill up zone failed: %s (errno=%d)",
5175                                         fdev->path, ret);
5176         } else if (ret) {
5177                 f2fs_err(sbi, "Finishing zone failed: %s (errno=%d)",
5178                                 fdev->path, ret);
5179         }
5180 
5181         return ret;
5182 }
5183 
5184 static struct f2fs_dev_info *get_target_zoned_dev(struct f2fs_sb_info *sbi,
5185                                                   block_t zone_blkaddr)
5186 {
5187         int i;
5188 
5189         for (i = 0; i < sbi->s_ndevs; i++) {
5190                 if (!bdev_is_zoned(FDEV(i).bdev))
5191                         continue;
5192                 if (sbi->s_ndevs == 1 || (FDEV(i).start_blk <= zone_blkaddr &&
5193                                 zone_blkaddr <= FDEV(i).end_blk))
5194                         return &FDEV(i);
5195         }
5196 
5197         return NULL;
5198 }
5199 
5200 static int report_one_zone_cb(struct blk_zone *zone, unsigned int idx,
5201                               void *data)
5202 {
5203         memcpy(data, zone, sizeof(struct blk_zone));
5204         return 0;
5205 }
5206 
5207 static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type)
5208 {
5209         struct curseg_info *cs = CURSEG_I(sbi, type);
5210         struct f2fs_dev_info *zbd;
5211         struct blk_zone zone;
5212         unsigned int cs_section, wp_segno, wp_blkoff, wp_sector_off;
5213         block_t cs_zone_block, wp_block;
5214         unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT;
5215         sector_t zone_sector;
5216         int err;
5217 
5218         cs_section = GET_SEC_FROM_SEG(sbi, cs->segno);
5219         cs_zone_block = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, cs_section));
5220 
5221         zbd = get_target_zoned_dev(sbi, cs_zone_block);
5222         if (!zbd)
5223                 return 0;
5224 
5225         /* report zone for the sector the curseg points to */
5226         zone_sector = (sector_t)(cs_zone_block - zbd->start_blk)
5227                 << log_sectors_per_block;
5228         err = blkdev_report_zones(zbd->bdev, zone_sector, 1,
5229                                   report_one_zone_cb, &zone);
5230         if (err != 1) {
5231                 f2fs_err(sbi, "Report zone failed: %s errno=(%d)",
5232                          zbd->path, err);
5233                 return err;
5234         }
5235 
5236         if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ)
5237                 return 0;
5238 
5239         /*
5240          * When safely unmounted in the previous mount, we could use current
5241          * segments. Otherwise, allocate new sections.
5242          */
5243         if (is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
5244                 wp_block = zbd->start_blk + (zone.wp >> log_sectors_per_block);
5245                 wp_segno = GET_SEGNO(sbi, wp_block);
5246                 wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno);
5247                 wp_sector_off = zone.wp & GENMASK(log_sectors_per_block - 1, 0);
5248 
5249                 if (cs->segno == wp_segno && cs->next_blkoff == wp_blkoff &&
5250                                 wp_sector_off == 0)
5251                         return 0;
5252 
5253                 f2fs_notice(sbi, "Unaligned curseg[%d] with write pointer: "
5254                             "curseg[0x%x,0x%x] wp[0x%x,0x%x]", type, cs->segno,
5255                             cs->next_blkoff, wp_segno, wp_blkoff);
5256         }
5257 
5258         /* Allocate a new section if it's not new. */
5259         if (cs->next_blkoff ||
5260             cs->segno != GET_SEG_FROM_SEC(sbi, GET_ZONE_FROM_SEC(sbi, cs_section))) {
5261                 unsigned int old_segno = cs->segno, old_blkoff = cs->next_blkoff;
5262 
5263                 f2fs_allocate_new_section(sbi, type, true);
5264                 f2fs_notice(sbi, "Assign new section to curseg[%d]: "
5265                                 "[0x%x,0x%x] -> [0x%x,0x%x]",
5266                                 type, old_segno, old_blkoff,
5267                                 cs->segno, cs->next_blkoff);
5268         }
5269 
5270         /* check consistency of the zone curseg pointed to */
5271         if (check_zone_write_pointer(sbi, zbd, &zone))
5272                 return -EIO;
5273 
5274         /* check newly assigned zone */
5275         cs_section = GET_SEC_FROM_SEG(sbi, cs->segno);
5276         cs_zone_block = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, cs_section));
5277 
5278         zbd = get_target_zoned_dev(sbi, cs_zone_block);
5279         if (!zbd)
5280                 return 0;
5281 
5282         zone_sector = (sector_t)(cs_zone_block - zbd->start_blk)
5283                 << log_sectors_per_block;
5284         err = blkdev_report_zones(zbd->bdev, zone_sector, 1,
5285                                   report_one_zone_cb, &zone);
5286         if (err != 1) {
5287                 f2fs_err(sbi, "Report zone failed: %s errno=(%d)",
5288                          zbd->path, err);
5289                 return err;
5290         }
5291 
5292         if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ)
5293                 return 0;
5294 
5295         if (zone.wp != zone.start) {
5296                 f2fs_notice(sbi,
5297                             "New zone for curseg[%d] is not yet discarded. "
5298                             "Reset the zone: curseg[0x%x,0x%x]",
5299                             type, cs->segno, cs->next_blkoff);
5300                 err = __f2fs_issue_discard_zone(sbi, zbd->bdev, cs_zone_block,
5301                                         zone.len >> log_sectors_per_block);
5302                 if (err) {
5303                         f2fs_err(sbi, "Discard zone failed: %s (errno=%d)",
5304                                  zbd->path, err);
5305                         return err;
5306                 }
5307         }
5308 
5309         return 0;
5310 }
5311 
5312 int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
5313 {
5314         int i, ret;
5315 
5316         for (i = 0; i < NR_PERSISTENT_LOG; i++) {
5317                 ret = fix_curseg_write_pointer(sbi, i);
5318                 if (ret)
5319                         return ret;
5320         }
5321 
5322         return 0;
5323 }
5324 
5325 struct check_zone_write_pointer_args {
5326         struct f2fs_sb_info *sbi;
5327         struct f2fs_dev_info *fdev;
5328 };
5329 
5330 static int check_zone_write_pointer_cb(struct blk_zone *zone, unsigned int idx,
5331                                       void *data)
5332 {
5333         struct check_zone_write_pointer_args *args;
5334 
5335         args = (struct check_zone_write_pointer_args *)data;
5336 
5337         return check_zone_write_pointer(args->sbi, args->fdev, zone);
5338 }
5339 
5340 int f2fs_check_write_pointer(struct f2fs_sb_info *sbi)
5341 {
5342         int i, ret;
5343         struct check_zone_write_pointer_args args;
5344 
5345         for (i = 0; i < sbi->s_ndevs; i++) {
5346                 if (!bdev_is_zoned(FDEV(i).bdev))
5347                         continue;
5348 
5349                 args.sbi = sbi;
5350                 args.fdev = &FDEV(i);
5351                 ret = blkdev_report_zones(FDEV(i).bdev, 0, BLK_ALL_ZONES,
5352                                           check_zone_write_pointer_cb, &args);
5353                 if (ret < 0)
5354                         return ret;
5355         }
5356 
5357         return 0;
5358 }
5359 
5360 /*
5361  * Return the number of usable blocks in a segment. The number of blocks
5362  * returned is always equal to the number of blocks in a segment for
5363  * segments fully contained within a sequential zone capacity or a
5364  * conventional zone. For segments partially contained in a sequential
5365  * zone capacity, the number of usable blocks up to the zone capacity
5366  * is returned. 0 is returned in all other cases.
5367  */
5368 static inline unsigned int f2fs_usable_zone_blks_in_seg(
5369                         struct f2fs_sb_info *sbi, unsigned int segno)
5370 {
5371         block_t seg_start, sec_start_blkaddr, sec_cap_blkaddr;
5372         unsigned int secno;
5373 
5374         if (!sbi->unusable_blocks_per_sec)
5375                 return BLKS_PER_SEG(sbi);
5376 
5377         secno = GET_SEC_FROM_SEG(sbi, segno);
5378         seg_start = START_BLOCK(sbi, segno);
5379         sec_start_blkaddr = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, secno));
5380         sec_cap_blkaddr = sec_start_blkaddr + CAP_BLKS_PER_SEC(sbi);
5381 
5382         /*
5383          * If segment starts before zone capacity and spans beyond
5384          * zone capacity, then usable blocks are from seg start to
5385          * zone capacity. If the segment starts after the zone capacity,
5386          * then there are no usable blocks.
5387          */
5388         if (seg_start >= sec_cap_blkaddr)
5389                 return 0;
5390         if (seg_start + BLKS_PER_SEG(sbi) > sec_cap_blkaddr)
5391                 return sec_cap_blkaddr - seg_start;
5392 
5393         return BLKS_PER_SEG(sbi);
5394 }
5395 #else
5396 int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
5397 {
5398         return 0;
5399 }
5400 
5401 int f2fs_check_write_pointer(struct f2fs_sb_info *sbi)
5402 {
5403         return 0;
5404 }
5405 
5406 static inline unsigned int f2fs_usable_zone_blks_in_seg(struct f2fs_sb_info *sbi,
5407                                                         unsigned int segno)
5408 {
5409         return 0;
5410 }
5411 
5412 #endif
5413 unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi,
5414                                         unsigned int segno)
5415 {
5416         if (f2fs_sb_has_blkzoned(sbi))
5417                 return f2fs_usable_zone_blks_in_seg(sbi, segno);
5418 
5419         return BLKS_PER_SEG(sbi);
5420 }
5421 
5422 unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi,
5423                                         unsigned int segno)
5424 {
5425         if (f2fs_sb_has_blkzoned(sbi))
5426                 return CAP_SEGS_PER_SEC(sbi);
5427 
5428         return SEGS_PER_SEC(sbi);
5429 }
5430 
5431 /*
5432  * Update min, max modified time for cost-benefit GC algorithm
5433  */
5434 static void init_min_max_mtime(struct f2fs_sb_info *sbi)
5435 {
5436         struct sit_info *sit_i = SIT_I(sbi);
5437         unsigned int segno;
5438 
5439         down_write(&sit_i->sentry_lock);
5440 
5441         sit_i->min_mtime = ULLONG_MAX;
5442 
5443         for (segno = 0; segno < MAIN_SEGS(sbi); segno += SEGS_PER_SEC(sbi)) {
5444                 unsigned int i;
5445                 unsigned long long mtime = 0;
5446 
5447                 for (i = 0; i < SEGS_PER_SEC(sbi); i++)
5448                         mtime += get_seg_entry(sbi, segno + i)->mtime;
5449 
5450                 mtime = div_u64(mtime, SEGS_PER_SEC(sbi));
5451 
5452                 if (sit_i->min_mtime > mtime)
5453                         sit_i->min_mtime = mtime;
5454         }
5455         sit_i->max_mtime = get_mtime(sbi, false);
5456         sit_i->dirty_max_mtime = 0;
5457         up_write(&sit_i->sentry_lock);
5458 }
5459 
5460 int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
5461 {
5462         struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
5463         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
5464         struct f2fs_sm_info *sm_info;
5465         int err;
5466 
5467         sm_info = f2fs_kzalloc(sbi, sizeof(struct f2fs_sm_info), GFP_KERNEL);
5468         if (!sm_info)
5469                 return -ENOMEM;
5470 
5471         /* init sm info */
5472         sbi->sm_info = sm_info;
5473         sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
5474         sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
5475         sm_info->segment_count = le32_to_cpu(raw_super->segment_count);
5476         sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
5477         sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
5478         sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
5479         sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
5480         sm_info->rec_prefree_segments = sm_info->main_segments *
5481                                         DEF_RECLAIM_PREFREE_SEGMENTS / 100;
5482         if (sm_info->rec_prefree_segments > DEF_MAX_RECLAIM_PREFREE_SEGMENTS)
5483                 sm_info->rec_prefree_segments = DEF_MAX_RECLAIM_PREFREE_SEGMENTS;
5484 
5485         if (!f2fs_lfs_mode(sbi))
5486                 sm_info->ipu_policy = BIT(F2FS_IPU_FSYNC);
5487         sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
5488         sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
5489         sm_info->min_seq_blocks = BLKS_PER_SEG(sbi);
5490         sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
5491         sm_info->min_ssr_sections = reserved_sections(sbi);
5492 
5493         INIT_LIST_HEAD(&sm_info->sit_entry_set);
5494 
5495         init_f2fs_rwsem(&sm_info->curseg_lock);
5496 
5497         err = f2fs_create_flush_cmd_control(sbi);
5498         if (err)
5499                 return err;
5500 
5501         err = create_discard_cmd_control(sbi);
5502         if (err)
5503                 return err;
5504 
5505         err = build_sit_info(sbi);
5506         if (err)
5507                 return err;
5508         err = build_free_segmap(sbi);
5509         if (err)
5510                 return err;
5511         err = build_curseg(sbi);
5512         if (err)
5513                 return err;
5514 
5515         /* reinit free segmap based on SIT */
5516         err = build_sit_entries(sbi);
5517         if (err)
5518                 return err;
5519 
5520         init_free_segmap(sbi);
5521         err = build_dirty_segmap(sbi);
5522         if (err)
5523                 return err;
5524 
5525         err = sanity_check_curseg(sbi);
5526         if (err)
5527                 return err;
5528 
5529         init_min_max_mtime(sbi);
5530         return 0;
5531 }
5532 
5533 static void discard_dirty_segmap(struct f2fs_sb_info *sbi,
5534                 enum dirty_type dirty_type)
5535 {
5536         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
5537 
5538         mutex_lock(&dirty_i->seglist_lock);
5539         kvfree(dirty_i->dirty_segmap[dirty_type]);
5540         dirty_i->nr_dirty[dirty_type] = 0;
5541         mutex_unlock(&dirty_i->seglist_lock);
5542 }
5543 
5544 static void destroy_victim_secmap(struct f2fs_sb_info *sbi)
5545 {
5546         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
5547 
5548         kvfree(dirty_i->pinned_secmap);
5549         kvfree(dirty_i->victim_secmap);
5550 }
5551 
5552 static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
5553 {
5554         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
5555         int i;
5556 
5557         if (!dirty_i)
5558                 return;
5559 
5560         /* discard pre-free/dirty segments list */
5561         for (i = 0; i < NR_DIRTY_TYPE; i++)
5562                 discard_dirty_segmap(sbi, i);
5563 
5564         if (__is_large_section(sbi)) {
5565                 mutex_lock(&dirty_i->seglist_lock);
5566                 kvfree(dirty_i->dirty_secmap);
5567                 mutex_unlock(&dirty_i->seglist_lock);
5568         }
5569 
5570         destroy_victim_secmap(sbi);
5571         SM_I(sbi)->dirty_info = NULL;
5572         kfree(dirty_i);
5573 }
5574 
5575 static void destroy_curseg(struct f2fs_sb_info *sbi)
5576 {
5577         struct curseg_info *array = SM_I(sbi)->curseg_array;
5578         int i;
5579 
5580         if (!array)
5581                 return;
5582         SM_I(sbi)->curseg_array = NULL;
5583         for (i = 0; i < NR_CURSEG_TYPE; i++) {
5584                 kfree(array[i].sum_blk);
5585                 kfree(array[i].journal);
5586         }
5587         kfree(array);
5588 }
5589 
5590 static void destroy_free_segmap(struct f2fs_sb_info *sbi)
5591 {
5592         struct free_segmap_info *free_i = SM_I(sbi)->free_info;
5593 
5594         if (!free_i)
5595                 return;
5596         SM_I(sbi)->free_info = NULL;
5597         kvfree(free_i->free_segmap);
5598         kvfree(free_i->free_secmap);
5599         kfree(free_i);
5600 }
5601 
5602 static void destroy_sit_info(struct f2fs_sb_info *sbi)
5603 {
5604         struct sit_info *sit_i = SIT_I(sbi);
5605 
5606         if (!sit_i)
5607                 return;
5608 
5609         if (sit_i->sentries)
5610                 kvfree(sit_i->bitmap);
5611         kfree(sit_i->tmp_map);
5612 
5613         kvfree(sit_i->sentries);
5614         kvfree(sit_i->sec_entries);
5615         kvfree(sit_i->dirty_sentries_bitmap);
5616 
5617         SM_I(sbi)->sit_info = NULL;
5618         kvfree(sit_i->sit_bitmap);
5619 #ifdef CONFIG_F2FS_CHECK_FS
5620         kvfree(sit_i->sit_bitmap_mir);
5621         kvfree(sit_i->invalid_segmap);
5622 #endif
5623         kfree(sit_i);
5624 }
5625 
5626 void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi)
5627 {
5628         struct f2fs_sm_info *sm_info = SM_I(sbi);
5629 
5630         if (!sm_info)
5631                 return;
5632         f2fs_destroy_flush_cmd_control(sbi, true);
5633         destroy_discard_cmd_control(sbi);
5634         destroy_dirty_segmap(sbi);
5635         destroy_curseg(sbi);
5636         destroy_free_segmap(sbi);
5637         destroy_sit_info(sbi);
5638         sbi->sm_info = NULL;
5639         kfree(sm_info);
5640 }
5641 
5642 int __init f2fs_create_segment_manager_caches(void)
5643 {
5644         discard_entry_slab = f2fs_kmem_cache_create("f2fs_discard_entry",
5645                         sizeof(struct discard_entry));
5646         if (!discard_entry_slab)
5647                 goto fail;
5648 
5649         discard_cmd_slab = f2fs_kmem_cache_create("f2fs_discard_cmd",
5650                         sizeof(struct discard_cmd));
5651         if (!discard_cmd_slab)
5652                 goto destroy_discard_entry;
5653 
5654         sit_entry_set_slab = f2fs_kmem_cache_create("f2fs_sit_entry_set",
5655                         sizeof(struct sit_entry_set));
5656         if (!sit_entry_set_slab)
5657                 goto destroy_discard_cmd;
5658 
5659         revoke_entry_slab = f2fs_kmem_cache_create("f2fs_revoke_entry",
5660                         sizeof(struct revoke_entry));
5661         if (!revoke_entry_slab)
5662                 goto destroy_sit_entry_set;
5663         return 0;
5664 
5665 destroy_sit_entry_set:
5666         kmem_cache_destroy(sit_entry_set_slab);
5667 destroy_discard_cmd:
5668         kmem_cache_destroy(discard_cmd_slab);
5669 destroy_discard_entry:
5670         kmem_cache_destroy(discard_entry_slab);
5671 fail:
5672         return -ENOMEM;
5673 }
5674 
5675 void f2fs_destroy_segment_manager_caches(void)
5676 {
5677         kmem_cache_destroy(sit_entry_set_slab);
5678         kmem_cache_destroy(discard_cmd_slab);
5679         kmem_cache_destroy(discard_entry_slab);
5680         kmem_cache_destroy(revoke_entry_slab);
5681 }
5682 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php