~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~
file.c

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~
  1 // SPDX-License-Identifier: GPL-2.0
  2 /*
  3  * fs/f2fs/file.c
  4  *
  5  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  6  *             http://www.samsung.com/
  7  */
  8 #include <linux/fs.h>
  9 #include <linux/f2fs_fs.h>
 10 #include <linux/stat.h>
 11 #include <linux/buffer_head.h>
 12 #include <linux/writeback.h>
 13 #include <linux/blkdev.h>
 14 #include <linux/falloc.h>
 15 #include <linux/types.h>
 16 #include <linux/compat.h>
 17 #include <linux/uaccess.h>
 18 #include <linux/mount.h>
 19 #include <linux/pagevec.h>
 20 #include <linux/uio.h>
 21 #include <linux/uuid.h>
 22 #include <linux/file.h>
 23 #include <linux/nls.h>
 24 #include <linux/sched/signal.h>
 25 #include <linux/fileattr.h>
 26 #include <linux/fadvise.h>
 27 #include <linux/iomap.h>
 28 
 29 #include "f2fs.h"
 30 #include "node.h"
 31 #include "segment.h"
 32 #include "xattr.h"
 33 #include "acl.h"
 34 #include "gc.h"
 35 #include "iostat.h"
 36 #include <trace/events/f2fs.h>
 37 #include <uapi/linux/f2fs.h>
 38 
 39 static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf)
 40 {
 41         struct inode *inode = file_inode(vmf->vma->vm_file);
 42         vm_flags_t flags = vmf->vma->vm_flags;
 43         vm_fault_t ret;
 44 
 45         ret = filemap_fault(vmf);
 46         if (ret & VM_FAULT_LOCKED)
 47                 f2fs_update_iostat(F2FS_I_SB(inode), inode,
 48                                         APP_MAPPED_READ_IO, F2FS_BLKSIZE);
 49 
 50         trace_f2fs_filemap_fault(inode, vmf->pgoff, flags, ret);
 51 
 52         return ret;
 53 }
 54 
 55 static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
 56 {
 57         struct page *page = vmf->page;
 58         struct inode *inode = file_inode(vmf->vma->vm_file);
 59         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 60         struct dnode_of_data dn;
 61         bool need_alloc = !f2fs_is_pinned_file(inode);
 62         int err = 0;
 63         vm_fault_t ret;
 64 
 65         if (unlikely(IS_IMMUTABLE(inode)))
 66                 return VM_FAULT_SIGBUS;
 67 
 68         if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
 69                 err = -EIO;
 70                 goto out;
 71         }
 72 
 73         if (unlikely(f2fs_cp_error(sbi))) {
 74                 err = -EIO;
 75                 goto out;
 76         }
 77 
 78         if (!f2fs_is_checkpoint_ready(sbi)) {
 79                 err = -ENOSPC;
 80                 goto out;
 81         }
 82 
 83         err = f2fs_convert_inline_inode(inode);
 84         if (err)
 85                 goto out;
 86 
 87 #ifdef CONFIG_F2FS_FS_COMPRESSION
 88         if (f2fs_compressed_file(inode)) {
 89                 int ret = f2fs_is_compressed_cluster(inode, page->index);
 90 
 91                 if (ret < 0) {
 92                         err = ret;
 93                         goto out;
 94                 } else if (ret) {
 95                         need_alloc = false;
 96                 }
 97         }
 98 #endif
 99         /* should do out of any locked page */
100         if (need_alloc)
101                 f2fs_balance_fs(sbi, true);
102 
103         sb_start_pagefault(inode->i_sb);
104 
105         f2fs_bug_on(sbi, f2fs_has_inline_data(inode));
106 
107         file_update_time(vmf->vma->vm_file);
108         filemap_invalidate_lock_shared(inode->i_mapping);
109         lock_page(page);
110         if (unlikely(page->mapping != inode->i_mapping ||
111                         page_offset(page) > i_size_read(inode) ||
112                         !PageUptodate(page))) {
113                 unlock_page(page);
114                 err = -EFAULT;
115                 goto out_sem;
116         }
117 
118         set_new_dnode(&dn, inode, NULL, NULL, 0);
119         if (need_alloc) {
120                 /* block allocation */
121                 err = f2fs_get_block_locked(&dn, page->index);
122         } else {
123                 err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
124                 f2fs_put_dnode(&dn);
125                 if (f2fs_is_pinned_file(inode) &&
126                     !__is_valid_data_blkaddr(dn.data_blkaddr))
127                         err = -EIO;
128         }
129 
130         if (err) {
131                 unlock_page(page);
132                 goto out_sem;
133         }
134 
135         f2fs_wait_on_page_writeback(page, DATA, false, true);
136 
137         /* wait for GCed page writeback via META_MAPPING */
138         f2fs_wait_on_block_writeback(inode, dn.data_blkaddr);
139 
140         /*
141          * check to see if the page is mapped already (no holes)
142          */
143         if (PageMappedToDisk(page))
144                 goto out_sem;
145 
146         /* page is wholly or partially inside EOF */
147         if (((loff_t)(page->index + 1) << PAGE_SHIFT) >
148                                                 i_size_read(inode)) {
149                 loff_t offset;
150 
151                 offset = i_size_read(inode) & ~PAGE_MASK;
152                 zero_user_segment(page, offset, PAGE_SIZE);
153         }
154         set_page_dirty(page);
155 
156         f2fs_update_iostat(sbi, inode, APP_MAPPED_IO, F2FS_BLKSIZE);
157         f2fs_update_time(sbi, REQ_TIME);
158 
159 out_sem:
160         filemap_invalidate_unlock_shared(inode->i_mapping);
161 
162         sb_end_pagefault(inode->i_sb);
163 out:
164         ret = vmf_fs_error(err);
165 
166         trace_f2fs_vm_page_mkwrite(inode, page->index, vmf->vma->vm_flags, ret);
167         return ret;
168 }
169 
170 static const struct vm_operations_struct f2fs_file_vm_ops = {
171         .fault          = f2fs_filemap_fault,
172         .map_pages      = filemap_map_pages,
173         .page_mkwrite   = f2fs_vm_page_mkwrite,
174 };
175 
176 static int get_parent_ino(struct inode *inode, nid_t *pino)
177 {
178         struct dentry *dentry;
179 
180         /*
181          * Make sure to get the non-deleted alias.  The alias associated with
182          * the open file descriptor being fsync()'ed may be deleted already.
183          */
184         dentry = d_find_alias(inode);
185         if (!dentry)
186                 return 0;
187 
188         *pino = d_parent_ino(dentry);
189         dput(dentry);
190         return 1;
191 }
192 
193 static inline enum cp_reason_type need_do_checkpoint(struct inode *inode)
194 {
195         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
196         enum cp_reason_type cp_reason = CP_NO_NEEDED;
197 
198         if (!S_ISREG(inode->i_mode))
199                 cp_reason = CP_NON_REGULAR;
200         else if (f2fs_compressed_file(inode))
201                 cp_reason = CP_COMPRESSED;
202         else if (inode->i_nlink != 1)
203                 cp_reason = CP_HARDLINK;
204         else if (is_sbi_flag_set(sbi, SBI_NEED_CP))
205                 cp_reason = CP_SB_NEED_CP;
206         else if (file_wrong_pino(inode))
207                 cp_reason = CP_WRONG_PINO;
208         else if (!f2fs_space_for_roll_forward(sbi))
209                 cp_reason = CP_NO_SPC_ROLL;
210         else if (!f2fs_is_checkpointed_node(sbi, F2FS_I(inode)->i_pino))
211                 cp_reason = CP_NODE_NEED_CP;
212         else if (test_opt(sbi, FASTBOOT))
213                 cp_reason = CP_FASTBOOT_MODE;
214         else if (F2FS_OPTION(sbi).active_logs == 2)
215                 cp_reason = CP_SPEC_LOG_NUM;
216         else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT &&
217                 f2fs_need_dentry_mark(sbi, inode->i_ino) &&
218                 f2fs_exist_written_data(sbi, F2FS_I(inode)->i_pino,
219                                                         TRANS_DIR_INO))
220                 cp_reason = CP_RECOVER_DIR;
221         else if (f2fs_exist_written_data(sbi, F2FS_I(inode)->i_pino,
222                                                         XATTR_DIR_INO))
223                 cp_reason = CP_XATTR_DIR;
224 
225         return cp_reason;
226 }
227 
228 static bool need_inode_page_update(struct f2fs_sb_info *sbi, nid_t ino)
229 {
230         struct page *i = find_get_page(NODE_MAPPING(sbi), ino);
231         bool ret = false;
232         /* But we need to avoid that there are some inode updates */
233         if ((i && PageDirty(i)) || f2fs_need_inode_block_update(sbi, ino))
234                 ret = true;
235         f2fs_put_page(i, 0);
236         return ret;
237 }
238 
239 static void try_to_fix_pino(struct inode *inode)
240 {
241         struct f2fs_inode_info *fi = F2FS_I(inode);
242         nid_t pino;
243 
244         f2fs_down_write(&fi->i_sem);
245         if (file_wrong_pino(inode) && inode->i_nlink == 1 &&
246                         get_parent_ino(inode, &pino)) {
247                 f2fs_i_pino_write(inode, pino);
248                 file_got_pino(inode);
249         }
250         f2fs_up_write(&fi->i_sem);
251 }
252 
253 static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
254                                                 int datasync, bool atomic)
255 {
256         struct inode *inode = file->f_mapping->host;
257         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
258         nid_t ino = inode->i_ino;
259         int ret = 0;
260         enum cp_reason_type cp_reason = 0;
261         struct writeback_control wbc = {
262                 .sync_mode = WB_SYNC_ALL,
263                 .nr_to_write = LONG_MAX,
264                 .for_reclaim = 0,
265         };
266         unsigned int seq_id = 0;
267 
268         if (unlikely(f2fs_readonly(inode->i_sb)))
269                 return 0;
270 
271         trace_f2fs_sync_file_enter(inode);
272 
273         if (S_ISDIR(inode->i_mode))
274                 goto go_write;
275 
276         /* if fdatasync is triggered, let's do in-place-update */
277         if (datasync || get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks)
278                 set_inode_flag(inode, FI_NEED_IPU);
279         ret = file_write_and_wait_range(file, start, end);
280         clear_inode_flag(inode, FI_NEED_IPU);
281 
282         if (ret || is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
283                 trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret);
284                 return ret;
285         }
286 
287         /* if the inode is dirty, let's recover all the time */
288         if (!f2fs_skip_inode_update(inode, datasync)) {
289                 f2fs_write_inode(inode, NULL);
290                 goto go_write;
291         }
292 
293         /*
294          * if there is no written data, don't waste time to write recovery info.
295          */
296         if (!is_inode_flag_set(inode, FI_APPEND_WRITE) &&
297                         !f2fs_exist_written_data(sbi, ino, APPEND_INO)) {
298 
299                 /* it may call write_inode just prior to fsync */
300                 if (need_inode_page_update(sbi, ino))
301                         goto go_write;
302 
303                 if (is_inode_flag_set(inode, FI_UPDATE_WRITE) ||
304                                 f2fs_exist_written_data(sbi, ino, UPDATE_INO))
305                         goto flush_out;
306                 goto out;
307         } else {
308                 /*
309                  * for OPU case, during fsync(), node can be persisted before
310                  * data when lower device doesn't support write barrier, result
311                  * in data corruption after SPO.
312                  * So for strict fsync mode, force to use atomic write semantics
313                  * to keep write order in between data/node and last node to
314                  * avoid potential data corruption.
315                  */
316                 if (F2FS_OPTION(sbi).fsync_mode ==
317                                 FSYNC_MODE_STRICT && !atomic)
318                         atomic = true;
319         }
320 go_write:
321         /*
322          * Both of fdatasync() and fsync() are able to be recovered from
323          * sudden-power-off.
324          */
325         f2fs_down_read(&F2FS_I(inode)->i_sem);
326         cp_reason = need_do_checkpoint(inode);
327         f2fs_up_read(&F2FS_I(inode)->i_sem);
328 
329         if (cp_reason) {
330                 /* all the dirty node pages should be flushed for POR */
331                 ret = f2fs_sync_fs(inode->i_sb, 1);
332 
333                 /*
334                  * We've secured consistency through sync_fs. Following pino
335                  * will be used only for fsynced inodes after checkpoint.
336                  */
337                 try_to_fix_pino(inode);
338                 clear_inode_flag(inode, FI_APPEND_WRITE);
339                 clear_inode_flag(inode, FI_UPDATE_WRITE);
340                 goto out;
341         }
342 sync_nodes:
343         atomic_inc(&sbi->wb_sync_req[NODE]);
344         ret = f2fs_fsync_node_pages(sbi, inode, &wbc, atomic, &seq_id);
345         atomic_dec(&sbi->wb_sync_req[NODE]);
346         if (ret)
347                 goto out;
348 
349         /* if cp_error was enabled, we should avoid infinite loop */
350         if (unlikely(f2fs_cp_error(sbi))) {
351                 ret = -EIO;
352                 goto out;
353         }
354 
355         if (f2fs_need_inode_block_update(sbi, ino)) {
356                 f2fs_mark_inode_dirty_sync(inode, true);
357                 f2fs_write_inode(inode, NULL);
358                 goto sync_nodes;
359         }
360 
361         /*
362          * If it's atomic_write, it's just fine to keep write ordering. So
363          * here we don't need to wait for node write completion, since we use
364          * node chain which serializes node blocks. If one of node writes are
365          * reordered, we can see simply broken chain, resulting in stopping
366          * roll-forward recovery. It means we'll recover all or none node blocks
367          * given fsync mark.
368          */
369         if (!atomic) {
370                 ret = f2fs_wait_on_node_pages_writeback(sbi, seq_id);
371                 if (ret)
372                         goto out;
373         }
374 
375         /* once recovery info is written, don't need to tack this */
376         f2fs_remove_ino_entry(sbi, ino, APPEND_INO);
377         clear_inode_flag(inode, FI_APPEND_WRITE);
378 flush_out:
379         if ((!atomic && F2FS_OPTION(sbi).fsync_mode != FSYNC_MODE_NOBARRIER) ||
380             (atomic && !test_opt(sbi, NOBARRIER) && f2fs_sb_has_blkzoned(sbi)))
381                 ret = f2fs_issue_flush(sbi, inode->i_ino);
382         if (!ret) {
383                 f2fs_remove_ino_entry(sbi, ino, UPDATE_INO);
384                 clear_inode_flag(inode, FI_UPDATE_WRITE);
385                 f2fs_remove_ino_entry(sbi, ino, FLUSH_INO);
386         }
387         f2fs_update_time(sbi, REQ_TIME);
388 out:
389         trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret);
390         return ret;
391 }
392 
393 int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
394 {
395         if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(file)))))
396                 return -EIO;
397         return f2fs_do_sync_file(file, start, end, datasync, false);
398 }
399 
400 static bool __found_offset(struct address_space *mapping,
401                 struct dnode_of_data *dn, pgoff_t index, int whence)
402 {
403         block_t blkaddr = f2fs_data_blkaddr(dn);
404         struct inode *inode = mapping->host;
405         bool compressed_cluster = false;
406 
407         if (f2fs_compressed_file(inode)) {
408                 block_t first_blkaddr = data_blkaddr(dn->inode, dn->node_page,
409                     ALIGN_DOWN(dn->ofs_in_node, F2FS_I(inode)->i_cluster_size));
410 
411                 compressed_cluster = first_blkaddr == COMPRESS_ADDR;
412         }
413 
414         switch (whence) {
415         case SEEK_DATA:
416                 if (__is_valid_data_blkaddr(blkaddr))
417                         return true;
418                 if (blkaddr == NEW_ADDR &&
419                     xa_get_mark(&mapping->i_pages, index, PAGECACHE_TAG_DIRTY))
420                         return true;
421                 if (compressed_cluster)
422                         return true;
423                 break;
424         case SEEK_HOLE:
425                 if (compressed_cluster)
426                         return false;
427                 if (blkaddr == NULL_ADDR)
428                         return true;
429                 break;
430         }
431         return false;
432 }
433 
434 static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
435 {
436         struct inode *inode = file->f_mapping->host;
437         loff_t maxbytes = inode->i_sb->s_maxbytes;
438         struct dnode_of_data dn;
439         pgoff_t pgofs, end_offset;
440         loff_t data_ofs = offset;
441         loff_t isize;
442         int err = 0;
443 
444         inode_lock_shared(inode);
445 
446         isize = i_size_read(inode);
447         if (offset >= isize)
448                 goto fail;
449 
450         /* handle inline data case */
451         if (f2fs_has_inline_data(inode)) {
452                 if (whence == SEEK_HOLE) {
453                         data_ofs = isize;
454                         goto found;
455                 } else if (whence == SEEK_DATA) {
456                         data_ofs = offset;
457                         goto found;
458                 }
459         }
460 
461         pgofs = (pgoff_t)(offset >> PAGE_SHIFT);
462 
463         for (; data_ofs < isize; data_ofs = (loff_t)pgofs << PAGE_SHIFT) {
464                 set_new_dnode(&dn, inode, NULL, NULL, 0);
465                 err = f2fs_get_dnode_of_data(&dn, pgofs, LOOKUP_NODE);
466                 if (err && err != -ENOENT) {
467                         goto fail;
468                 } else if (err == -ENOENT) {
469                         /* direct node does not exists */
470                         if (whence == SEEK_DATA) {
471                                 pgofs = f2fs_get_next_page_offset(&dn, pgofs);
472                                 continue;
473                         } else {
474                                 goto found;
475                         }
476                 }
477 
478                 end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
479 
480                 /* find data/hole in dnode block */
481                 for (; dn.ofs_in_node < end_offset;
482                                 dn.ofs_in_node++, pgofs++,
483                                 data_ofs = (loff_t)pgofs << PAGE_SHIFT) {
484                         block_t blkaddr;
485 
486                         blkaddr = f2fs_data_blkaddr(&dn);
487 
488                         if (__is_valid_data_blkaddr(blkaddr) &&
489                                 !f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
490                                         blkaddr, DATA_GENERIC_ENHANCE)) {
491                                 f2fs_put_dnode(&dn);
492                                 goto fail;
493                         }
494 
495                         if (__found_offset(file->f_mapping, &dn,
496                                                         pgofs, whence)) {
497                                 f2fs_put_dnode(&dn);
498                                 goto found;
499                         }
500                 }
501                 f2fs_put_dnode(&dn);
502         }
503 
504         if (whence == SEEK_DATA)
505                 goto fail;
506 found:
507         if (whence == SEEK_HOLE && data_ofs > isize)
508                 data_ofs = isize;
509         inode_unlock_shared(inode);
510         return vfs_setpos(file, data_ofs, maxbytes);
511 fail:
512         inode_unlock_shared(inode);
513         return -ENXIO;
514 }
515 
516 static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence)
517 {
518         struct inode *inode = file->f_mapping->host;
519         loff_t maxbytes = inode->i_sb->s_maxbytes;
520 
521         if (f2fs_compressed_file(inode))
522                 maxbytes = max_file_blocks(inode) << F2FS_BLKSIZE_BITS;
523 
524         switch (whence) {
525         case SEEK_SET:
526         case SEEK_CUR:
527         case SEEK_END:
528                 return generic_file_llseek_size(file, offset, whence,
529                                                 maxbytes, i_size_read(inode));
530         case SEEK_DATA:
531         case SEEK_HOLE:
532                 if (offset < 0)
533                         return -ENXIO;
534                 return f2fs_seek_block(file, offset, whence);
535         }
536 
537         return -EINVAL;
538 }
539 
540 static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma)
541 {
542         struct inode *inode = file_inode(file);
543 
544         if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
545                 return -EIO;
546 
547         if (!f2fs_is_compress_backend_ready(inode))
548                 return -EOPNOTSUPP;
549 
550         file_accessed(file);
551         vma->vm_ops = &f2fs_file_vm_ops;
552 
553         f2fs_down_read(&F2FS_I(inode)->i_sem);
554         set_inode_flag(inode, FI_MMAP_FILE);
555         f2fs_up_read(&F2FS_I(inode)->i_sem);
556 
557         return 0;
558 }
559 
560 static int finish_preallocate_blocks(struct inode *inode)
561 {
562         int ret;
563 
564         inode_lock(inode);
565         if (is_inode_flag_set(inode, FI_OPENED_FILE)) {
566                 inode_unlock(inode);
567                 return 0;
568         }
569 
570         if (!file_should_truncate(inode)) {
571                 set_inode_flag(inode, FI_OPENED_FILE);
572                 inode_unlock(inode);
573                 return 0;
574         }
575 
576         f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
577         filemap_invalidate_lock(inode->i_mapping);
578 
579         truncate_setsize(inode, i_size_read(inode));
580         ret = f2fs_truncate(inode);
581 
582         filemap_invalidate_unlock(inode->i_mapping);
583         f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
584 
585         if (!ret)
586                 set_inode_flag(inode, FI_OPENED_FILE);
587 
588         inode_unlock(inode);
589         if (ret)
590                 return ret;
591 
592         file_dont_truncate(inode);
593         return 0;
594 }
595 
596 static int f2fs_file_open(struct inode *inode, struct file *filp)
597 {
598         int err = fscrypt_file_open(inode, filp);
599 
600         if (err)
601                 return err;
602 
603         if (!f2fs_is_compress_backend_ready(inode))
604                 return -EOPNOTSUPP;
605 
606         err = fsverity_file_open(inode, filp);
607         if (err)
608                 return err;
609 
610         filp->f_mode |= FMODE_NOWAIT;
611         filp->f_mode |= FMODE_CAN_ODIRECT;
612 
613         err = dquot_file_open(inode, filp);
614         if (err)
615                 return err;
616 
617         return finish_preallocate_blocks(inode);
618 }
619 
620 void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count)
621 {
622         struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
623         int nr_free = 0, ofs = dn->ofs_in_node, len = count;
624         __le32 *addr;
625         bool compressed_cluster = false;
626         int cluster_index = 0, valid_blocks = 0;
627         int cluster_size = F2FS_I(dn->inode)->i_cluster_size;
628         bool released = !atomic_read(&F2FS_I(dn->inode)->i_compr_blocks);
629 
630         addr = get_dnode_addr(dn->inode, dn->node_page) + ofs;
631 
632         /* Assumption: truncation starts with cluster */
633         for (; count > 0; count--, addr++, dn->ofs_in_node++, cluster_index++) {
634                 block_t blkaddr = le32_to_cpu(*addr);
635 
636                 if (f2fs_compressed_file(dn->inode) &&
637                                         !(cluster_index & (cluster_size - 1))) {
638                         if (compressed_cluster)
639                                 f2fs_i_compr_blocks_update(dn->inode,
640                                                         valid_blocks, false);
641                         compressed_cluster = (blkaddr == COMPRESS_ADDR);
642                         valid_blocks = 0;
643                 }
644 
645                 if (blkaddr == NULL_ADDR)
646                         continue;
647 
648                 f2fs_set_data_blkaddr(dn, NULL_ADDR);
649 
650                 if (__is_valid_data_blkaddr(blkaddr)) {
651                         if (time_to_inject(sbi, FAULT_BLKADDR_CONSISTENCE))
652                                 continue;
653                         if (!f2fs_is_valid_blkaddr_raw(sbi, blkaddr,
654                                                 DATA_GENERIC_ENHANCE))
655                                 continue;
656                         if (compressed_cluster)
657                                 valid_blocks++;
658                 }
659 
660                 f2fs_invalidate_blocks(sbi, blkaddr);
661 
662                 if (!released || blkaddr != COMPRESS_ADDR)
663                         nr_free++;
664         }
665 
666         if (compressed_cluster)
667                 f2fs_i_compr_blocks_update(dn->inode, valid_blocks, false);
668 
669         if (nr_free) {
670                 pgoff_t fofs;
671                 /*
672                  * once we invalidate valid blkaddr in range [ofs, ofs + count],
673                  * we will invalidate all blkaddr in the whole range.
674                  */
675                 fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page),
676                                                         dn->inode) + ofs;
677                 f2fs_update_read_extent_cache_range(dn, fofs, 0, len);
678                 f2fs_update_age_extent_cache_range(dn, fofs, len);
679                 dec_valid_block_count(sbi, dn->inode, nr_free);
680         }
681         dn->ofs_in_node = ofs;
682 
683         f2fs_update_time(sbi, REQ_TIME);
684         trace_f2fs_truncate_data_blocks_range(dn->inode, dn->nid,
685                                          dn->ofs_in_node, nr_free);
686 }
687 
688 static int truncate_partial_data_page(struct inode *inode, u64 from,
689                                                                 bool cache_only)
690 {
691         loff_t offset = from & (PAGE_SIZE - 1);
692         pgoff_t index = from >> PAGE_SHIFT;
693         struct address_space *mapping = inode->i_mapping;
694         struct page *page;
695 
696         if (!offset && !cache_only)
697                 return 0;
698 
699         if (cache_only) {
700                 page = find_lock_page(mapping, index);
701                 if (page && PageUptodate(page))
702                         goto truncate_out;
703                 f2fs_put_page(page, 1);
704                 return 0;
705         }
706 
707         page = f2fs_get_lock_data_page(inode, index, true);
708         if (IS_ERR(page))
709                 return PTR_ERR(page) == -ENOENT ? 0 : PTR_ERR(page);
710 truncate_out:
711         f2fs_wait_on_page_writeback(page, DATA, true, true);
712         zero_user(page, offset, PAGE_SIZE - offset);
713 
714         /* An encrypted inode should have a key and truncate the last page. */
715         f2fs_bug_on(F2FS_I_SB(inode), cache_only && IS_ENCRYPTED(inode));
716         if (!cache_only)
717                 set_page_dirty(page);
718         f2fs_put_page(page, 1);
719         return 0;
720 }
721 
722 int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock)
723 {
724         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
725         struct dnode_of_data dn;
726         pgoff_t free_from;
727         int count = 0, err = 0;
728         struct page *ipage;
729         bool truncate_page = false;
730 
731         trace_f2fs_truncate_blocks_enter(inode, from);
732 
733         free_from = (pgoff_t)F2FS_BLK_ALIGN(from);
734 
735         if (free_from >= max_file_blocks(inode))
736                 goto free_partial;
737 
738         if (lock)
739                 f2fs_lock_op(sbi);
740 
741         ipage = f2fs_get_node_page(sbi, inode->i_ino);
742         if (IS_ERR(ipage)) {
743                 err = PTR_ERR(ipage);
744                 goto out;
745         }
746 
747         if (f2fs_has_inline_data(inode)) {
748                 f2fs_truncate_inline_inode(inode, ipage, from);
749                 f2fs_put_page(ipage, 1);
750                 truncate_page = true;
751                 goto out;
752         }
753 
754         set_new_dnode(&dn, inode, ipage, NULL, 0);
755         err = f2fs_get_dnode_of_data(&dn, free_from, LOOKUP_NODE_RA);
756         if (err) {
757                 if (err == -ENOENT)
758                         goto free_next;
759                 goto out;
760         }
761 
762         count = ADDRS_PER_PAGE(dn.node_page, inode);
763 
764         count -= dn.ofs_in_node;
765         f2fs_bug_on(sbi, count < 0);
766 
767         if (dn.ofs_in_node || IS_INODE(dn.node_page)) {
768                 f2fs_truncate_data_blocks_range(&dn, count);
769                 free_from += count;
770         }
771 
772         f2fs_put_dnode(&dn);
773 free_next:
774         err = f2fs_truncate_inode_blocks(inode, free_from);
775 out:
776         if (lock)
777                 f2fs_unlock_op(sbi);
778 free_partial:
779         /* lastly zero out the first data page */
780         if (!err)
781                 err = truncate_partial_data_page(inode, from, truncate_page);
782 
783         trace_f2fs_truncate_blocks_exit(inode, err);
784         return err;
785 }
786 
787 int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
788 {
789         u64 free_from = from;
790         int err;
791 
792 #ifdef CONFIG_F2FS_FS_COMPRESSION
793         /*
794          * for compressed file, only support cluster size
795          * aligned truncation.
796          */
797         if (f2fs_compressed_file(inode))
798                 free_from = round_up(from,
799                                 F2FS_I(inode)->i_cluster_size << PAGE_SHIFT);
800 #endif
801 
802         err = f2fs_do_truncate_blocks(inode, free_from, lock);
803         if (err)
804                 return err;
805 
806 #ifdef CONFIG_F2FS_FS_COMPRESSION
807         /*
808          * For compressed file, after release compress blocks, don't allow write
809          * direct, but we should allow write direct after truncate to zero.
810          */
811         if (f2fs_compressed_file(inode) && !free_from
812                         && is_inode_flag_set(inode, FI_COMPRESS_RELEASED))
813                 clear_inode_flag(inode, FI_COMPRESS_RELEASED);
814 
815         if (from != free_from) {
816                 err = f2fs_truncate_partial_cluster(inode, from, lock);
817                 if (err)
818                         return err;
819         }
820 #endif
821 
822         return 0;
823 }
824 
825 int f2fs_truncate(struct inode *inode)
826 {
827         int err;
828 
829         if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
830                 return -EIO;
831 
832         if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
833                                 S_ISLNK(inode->i_mode)))
834                 return 0;
835 
836         trace_f2fs_truncate(inode);
837 
838         if (time_to_inject(F2FS_I_SB(inode), FAULT_TRUNCATE))
839                 return -EIO;
840 
841         err = f2fs_dquot_initialize(inode);
842         if (err)
843                 return err;
844 
845         /* we should check inline_data size */
846         if (!f2fs_may_inline_data(inode)) {
847                 err = f2fs_convert_inline_inode(inode);
848                 if (err)
849                         return err;
850         }
851 
852         err = f2fs_truncate_blocks(inode, i_size_read(inode), true);
853         if (err)
854                 return err;
855 
856         inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
857         f2fs_mark_inode_dirty_sync(inode, false);
858         return 0;
859 }
860 
861 static bool f2fs_force_buffered_io(struct inode *inode, int rw)
862 {
863         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
864 
865         if (!fscrypt_dio_supported(inode))
866                 return true;
867         if (fsverity_active(inode))
868                 return true;
869         if (f2fs_compressed_file(inode))
870                 return true;
871         if (f2fs_has_inline_data(inode))
872                 return true;
873 
874         /* disallow direct IO if any of devices has unaligned blksize */
875         if (f2fs_is_multi_device(sbi) && !sbi->aligned_blksize)
876                 return true;
877         /*
878          * for blkzoned device, fallback direct IO to buffered IO, so
879          * all IOs can be serialized by log-structured write.
880          */
881         if (f2fs_sb_has_blkzoned(sbi) && (rw == WRITE) &&
882             !f2fs_is_pinned_file(inode))
883                 return true;
884         if (is_sbi_flag_set(sbi, SBI_CP_DISABLED))
885                 return true;
886 
887         return false;
888 }
889 
890 int f2fs_getattr(struct mnt_idmap *idmap, const struct path *path,
891                  struct kstat *stat, u32 request_mask, unsigned int query_flags)
892 {
893         struct inode *inode = d_inode(path->dentry);
894         struct f2fs_inode_info *fi = F2FS_I(inode);
895         struct f2fs_inode *ri = NULL;
896         unsigned int flags;
897 
898         if (f2fs_has_extra_attr(inode) &&
899                         f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)) &&
900                         F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_crtime)) {
901                 stat->result_mask |= STATX_BTIME;
902                 stat->btime.tv_sec = fi->i_crtime.tv_sec;
903                 stat->btime.tv_nsec = fi->i_crtime.tv_nsec;
904         }
905 
906         /*
907          * Return the DIO alignment restrictions if requested.  We only return
908          * this information when requested, since on encrypted files it might
909          * take a fair bit of work to get if the file wasn't opened recently.
910          *
911          * f2fs sometimes supports DIO reads but not DIO writes.  STATX_DIOALIGN
912          * cannot represent that, so in that case we report no DIO support.
913          */
914         if ((request_mask & STATX_DIOALIGN) && S_ISREG(inode->i_mode)) {
915                 unsigned int bsize = i_blocksize(inode);
916 
917                 stat->result_mask |= STATX_DIOALIGN;
918                 if (!f2fs_force_buffered_io(inode, WRITE)) {
919                         stat->dio_mem_align = bsize;
920                         stat->dio_offset_align = bsize;
921                 }
922         }
923 
924         flags = fi->i_flags;
925         if (flags & F2FS_COMPR_FL)
926                 stat->attributes |= STATX_ATTR_COMPRESSED;
927         if (flags & F2FS_APPEND_FL)
928                 stat->attributes |= STATX_ATTR_APPEND;
929         if (IS_ENCRYPTED(inode))
930                 stat->attributes |= STATX_ATTR_ENCRYPTED;
931         if (flags & F2FS_IMMUTABLE_FL)
932                 stat->attributes |= STATX_ATTR_IMMUTABLE;
933         if (flags & F2FS_NODUMP_FL)
934                 stat->attributes |= STATX_ATTR_NODUMP;
935         if (IS_VERITY(inode))
936                 stat->attributes |= STATX_ATTR_VERITY;
937 
938         stat->attributes_mask |= (STATX_ATTR_COMPRESSED |
939                                   STATX_ATTR_APPEND |
940                                   STATX_ATTR_ENCRYPTED |
941                                   STATX_ATTR_IMMUTABLE |
942                                   STATX_ATTR_NODUMP |
943                                   STATX_ATTR_VERITY);
944 
945         generic_fillattr(idmap, request_mask, inode, stat);
946 
947         /* we need to show initial sectors used for inline_data/dentries */
948         if ((S_ISREG(inode->i_mode) && f2fs_has_inline_data(inode)) ||
949                                         f2fs_has_inline_dentry(inode))
950                 stat->blocks += (stat->size + 511) >> 9;
951 
952         return 0;
953 }
954 
955 #ifdef CONFIG_F2FS_FS_POSIX_ACL
956 static void __setattr_copy(struct mnt_idmap *idmap,
957                            struct inode *inode, const struct iattr *attr)
958 {
959         unsigned int ia_valid = attr->ia_valid;
960 
961         i_uid_update(idmap, attr, inode);
962         i_gid_update(idmap, attr, inode);
963         if (ia_valid & ATTR_ATIME)
964                 inode_set_atime_to_ts(inode, attr->ia_atime);
965         if (ia_valid & ATTR_MTIME)
966                 inode_set_mtime_to_ts(inode, attr->ia_mtime);
967         if (ia_valid & ATTR_CTIME)
968                 inode_set_ctime_to_ts(inode, attr->ia_ctime);
969         if (ia_valid & ATTR_MODE) {
970                 umode_t mode = attr->ia_mode;
971 
972                 if (!in_group_or_capable(idmap, inode, i_gid_into_vfsgid(idmap, inode)))
973                         mode &= ~S_ISGID;
974                 set_acl_inode(inode, mode);
975         }
976 }
977 #else
978 #define __setattr_copy setattr_copy
979 #endif
980 
981 int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
982                  struct iattr *attr)
983 {
984         struct inode *inode = d_inode(dentry);
985         struct f2fs_inode_info *fi = F2FS_I(inode);
986         int err;
987 
988         if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
989                 return -EIO;
990 
991         if (unlikely(IS_IMMUTABLE(inode)))
992                 return -EPERM;
993 
994         if (unlikely(IS_APPEND(inode) &&
995                         (attr->ia_valid & (ATTR_MODE | ATTR_UID |
996                                   ATTR_GID | ATTR_TIMES_SET))))
997                 return -EPERM;
998 
999         if ((attr->ia_valid & ATTR_SIZE)) {
1000                 if (!f2fs_is_compress_backend_ready(inode))
1001                         return -EOPNOTSUPP;
1002                 if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) &&
1003                         !IS_ALIGNED(attr->ia_size,
1004                         F2FS_BLK_TO_BYTES(fi->i_cluster_size)))
1005                         return -EINVAL;
1006         }
1007 
1008         err = setattr_prepare(idmap, dentry, attr);
1009         if (err)
1010                 return err;
1011 
1012         err = fscrypt_prepare_setattr(dentry, attr);
1013         if (err)
1014                 return err;
1015 
1016         err = fsverity_prepare_setattr(dentry, attr);
1017         if (err)
1018                 return err;
1019 
1020         if (is_quota_modification(idmap, inode, attr)) {
1021                 err = f2fs_dquot_initialize(inode);
1022                 if (err)
1023                         return err;
1024         }
1025         if (i_uid_needs_update(idmap, attr, inode) ||
1026             i_gid_needs_update(idmap, attr, inode)) {
1027                 f2fs_lock_op(F2FS_I_SB(inode));
1028                 err = dquot_transfer(idmap, inode, attr);
1029                 if (err) {
1030                         set_sbi_flag(F2FS_I_SB(inode),
1031                                         SBI_QUOTA_NEED_REPAIR);
1032                         f2fs_unlock_op(F2FS_I_SB(inode));
1033                         return err;
1034                 }
1035                 /*
1036                  * update uid/gid under lock_op(), so that dquot and inode can
1037                  * be updated atomically.
1038                  */
1039                 i_uid_update(idmap, attr, inode);
1040                 i_gid_update(idmap, attr, inode);
1041                 f2fs_mark_inode_dirty_sync(inode, true);
1042                 f2fs_unlock_op(F2FS_I_SB(inode));
1043         }
1044 
1045         if (attr->ia_valid & ATTR_SIZE) {
1046                 loff_t old_size = i_size_read(inode);
1047 
1048                 if (attr->ia_size > MAX_INLINE_DATA(inode)) {
1049                         /*
1050                          * should convert inline inode before i_size_write to
1051                          * keep smaller than inline_data size with inline flag.
1052                          */
1053                         err = f2fs_convert_inline_inode(inode);
1054                         if (err)
1055                                 return err;
1056                 }
1057 
1058                 /*
1059                  * wait for inflight dio, blocks should be removed after
1060                  * IO completion.
1061                  */
1062                 if (attr->ia_size < old_size)
1063                         inode_dio_wait(inode);
1064 
1065                 f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
1066                 filemap_invalidate_lock(inode->i_mapping);
1067 
1068                 truncate_setsize(inode, attr->ia_size);
1069 
1070                 if (attr->ia_size <= old_size)
1071                         err = f2fs_truncate(inode);
1072                 /*
1073                  * do not trim all blocks after i_size if target size is
1074                  * larger than i_size.
1075                  */
1076                 filemap_invalidate_unlock(inode->i_mapping);
1077                 f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
1078                 if (err)
1079                         return err;
1080 
1081                 spin_lock(&fi->i_size_lock);
1082                 inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
1083                 fi->last_disk_size = i_size_read(inode);
1084                 spin_unlock(&fi->i_size_lock);
1085         }
1086 
1087         __setattr_copy(idmap, inode, attr);
1088 
1089         if (attr->ia_valid & ATTR_MODE) {
1090                 err = posix_acl_chmod(idmap, dentry, f2fs_get_inode_mode(inode));
1091 
1092                 if (is_inode_flag_set(inode, FI_ACL_MODE)) {
1093                         if (!err)
1094                                 inode->i_mode = fi->i_acl_mode;
1095                         clear_inode_flag(inode, FI_ACL_MODE);
1096                 }
1097         }
1098 
1099         /* file size may changed here */
1100         f2fs_mark_inode_dirty_sync(inode, true);
1101 
1102         /* inode change will produce dirty node pages flushed by checkpoint */
1103         f2fs_balance_fs(F2FS_I_SB(inode), true);
1104 
1105         return err;
1106 }
1107 
1108 const struct inode_operations f2fs_file_inode_operations = {
1109         .getattr        = f2fs_getattr,
1110         .setattr        = f2fs_setattr,
1111         .get_inode_acl  = f2fs_get_acl,
1112         .set_acl        = f2fs_set_acl,
1113         .listxattr      = f2fs_listxattr,
1114         .fiemap         = f2fs_fiemap,
1115         .fileattr_get   = f2fs_fileattr_get,
1116         .fileattr_set   = f2fs_fileattr_set,
1117 };
1118 
1119 static int fill_zero(struct inode *inode, pgoff_t index,
1120                                         loff_t start, loff_t len)
1121 {
1122         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1123         struct page *page;
1124 
1125         if (!len)
1126                 return 0;
1127 
1128         f2fs_balance_fs(sbi, true);
1129 
1130         f2fs_lock_op(sbi);
1131         page = f2fs_get_new_data_page(inode, NULL, index, false);
1132         f2fs_unlock_op(sbi);
1133 
1134         if (IS_ERR(page))
1135                 return PTR_ERR(page);
1136 
1137         f2fs_wait_on_page_writeback(page, DATA, true, true);
1138         zero_user(page, start, len);
1139         set_page_dirty(page);
1140         f2fs_put_page(page, 1);
1141         return 0;
1142 }
1143 
1144 int f2fs_truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end)
1145 {
1146         int err;
1147 
1148         while (pg_start < pg_end) {
1149                 struct dnode_of_data dn;
1150                 pgoff_t end_offset, count;
1151 
1152                 set_new_dnode(&dn, inode, NULL, NULL, 0);
1153                 err = f2fs_get_dnode_of_data(&dn, pg_start, LOOKUP_NODE);
1154                 if (err) {
1155                         if (err == -ENOENT) {
1156                                 pg_start = f2fs_get_next_page_offset(&dn,
1157                                                                 pg_start);
1158                                 continue;
1159                         }
1160                         return err;
1161                 }
1162 
1163                 end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
1164                 count = min(end_offset - dn.ofs_in_node, pg_end - pg_start);
1165 
1166                 f2fs_bug_on(F2FS_I_SB(inode), count == 0 || count > end_offset);
1167 
1168                 f2fs_truncate_data_blocks_range(&dn, count);
1169                 f2fs_put_dnode(&dn);
1170 
1171                 pg_start += count;
1172         }
1173         return 0;
1174 }
1175 
1176 static int f2fs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
1177 {
1178         pgoff_t pg_start, pg_end;
1179         loff_t off_start, off_end;
1180         int ret;
1181 
1182         ret = f2fs_convert_inline_inode(inode);
1183         if (ret)
1184                 return ret;
1185 
1186         pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
1187         pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
1188 
1189         off_start = offset & (PAGE_SIZE - 1);
1190         off_end = (offset + len) & (PAGE_SIZE - 1);
1191 
1192         if (pg_start == pg_end) {
1193                 ret = fill_zero(inode, pg_start, off_start,
1194                                                 off_end - off_start);
1195                 if (ret)
1196                         return ret;
1197         } else {
1198                 if (off_start) {
1199                         ret = fill_zero(inode, pg_start++, off_start,
1200                                                 PAGE_SIZE - off_start);
1201                         if (ret)
1202                                 return ret;
1203                 }
1204                 if (off_end) {
1205                         ret = fill_zero(inode, pg_end, 0, off_end);
1206                         if (ret)
1207                                 return ret;
1208                 }
1209 
1210                 if (pg_start < pg_end) {
1211                         loff_t blk_start, blk_end;
1212                         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1213 
1214                         f2fs_balance_fs(sbi, true);
1215 
1216                         blk_start = (loff_t)pg_start << PAGE_SHIFT;
1217                         blk_end = (loff_t)pg_end << PAGE_SHIFT;
1218 
1219                         f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1220                         filemap_invalidate_lock(inode->i_mapping);
1221 
1222                         truncate_pagecache_range(inode, blk_start, blk_end - 1);
1223 
1224                         f2fs_lock_op(sbi);
1225                         ret = f2fs_truncate_hole(inode, pg_start, pg_end);
1226                         f2fs_unlock_op(sbi);
1227 
1228                         filemap_invalidate_unlock(inode->i_mapping);
1229                         f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1230                 }
1231         }
1232 
1233         return ret;
1234 }
1235 
1236 static int __read_out_blkaddrs(struct inode *inode, block_t *blkaddr,
1237                                 int *do_replace, pgoff_t off, pgoff_t len)
1238 {
1239         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1240         struct dnode_of_data dn;
1241         int ret, done, i;
1242 
1243 next_dnode:
1244         set_new_dnode(&dn, inode, NULL, NULL, 0);
1245         ret = f2fs_get_dnode_of_data(&dn, off, LOOKUP_NODE_RA);
1246         if (ret && ret != -ENOENT) {
1247                 return ret;
1248         } else if (ret == -ENOENT) {
1249                 if (dn.max_level == 0)
1250                         return -ENOENT;
1251                 done = min((pgoff_t)ADDRS_PER_BLOCK(inode) -
1252                                                 dn.ofs_in_node, len);
1253                 blkaddr += done;
1254                 do_replace += done;
1255                 goto next;
1256         }
1257 
1258         done = min((pgoff_t)ADDRS_PER_PAGE(dn.node_page, inode) -
1259                                                         dn.ofs_in_node, len);
1260         for (i = 0; i < done; i++, blkaddr++, do_replace++, dn.ofs_in_node++) {
1261                 *blkaddr = f2fs_data_blkaddr(&dn);
1262 
1263                 if (__is_valid_data_blkaddr(*blkaddr) &&
1264                         !f2fs_is_valid_blkaddr(sbi, *blkaddr,
1265                                         DATA_GENERIC_ENHANCE)) {
1266                         f2fs_put_dnode(&dn);
1267                         return -EFSCORRUPTED;
1268                 }
1269 
1270                 if (!f2fs_is_checkpointed_data(sbi, *blkaddr)) {
1271 
1272                         if (f2fs_lfs_mode(sbi)) {
1273                                 f2fs_put_dnode(&dn);
1274                                 return -EOPNOTSUPP;
1275                         }
1276 
1277                         /* do not invalidate this block address */
1278                         f2fs_update_data_blkaddr(&dn, NULL_ADDR);
1279                         *do_replace = 1;
1280                 }
1281         }
1282         f2fs_put_dnode(&dn);
1283 next:
1284         len -= done;
1285         off += done;
1286         if (len)
1287                 goto next_dnode;
1288         return 0;
1289 }
1290 
1291 static int __roll_back_blkaddrs(struct inode *inode, block_t *blkaddr,
1292                                 int *do_replace, pgoff_t off, int len)
1293 {
1294         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1295         struct dnode_of_data dn;
1296         int ret, i;
1297 
1298         for (i = 0; i < len; i++, do_replace++, blkaddr++) {
1299                 if (*do_replace == 0)
1300                         continue;
1301 
1302                 set_new_dnode(&dn, inode, NULL, NULL, 0);
1303                 ret = f2fs_get_dnode_of_data(&dn, off + i, LOOKUP_NODE_RA);
1304                 if (ret) {
1305                         dec_valid_block_count(sbi, inode, 1);
1306                         f2fs_invalidate_blocks(sbi, *blkaddr);
1307                 } else {
1308                         f2fs_update_data_blkaddr(&dn, *blkaddr);
1309                 }
1310                 f2fs_put_dnode(&dn);
1311         }
1312         return 0;
1313 }
1314 
1315 static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode,
1316                         block_t *blkaddr, int *do_replace,
1317                         pgoff_t src, pgoff_t dst, pgoff_t len, bool full)
1318 {
1319         struct f2fs_sb_info *sbi = F2FS_I_SB(src_inode);
1320         pgoff_t i = 0;
1321         int ret;
1322 
1323         while (i < len) {
1324                 if (blkaddr[i] == NULL_ADDR && !full) {
1325                         i++;
1326                         continue;
1327                 }
1328 
1329                 if (do_replace[i] || blkaddr[i] == NULL_ADDR) {
1330                         struct dnode_of_data dn;
1331                         struct node_info ni;
1332                         size_t new_size;
1333                         pgoff_t ilen;
1334 
1335                         set_new_dnode(&dn, dst_inode, NULL, NULL, 0);
1336                         ret = f2fs_get_dnode_of_data(&dn, dst + i, ALLOC_NODE);
1337                         if (ret)
1338                                 return ret;
1339 
1340                         ret = f2fs_get_node_info(sbi, dn.nid, &ni, false);
1341                         if (ret) {
1342                                 f2fs_put_dnode(&dn);
1343                                 return ret;
1344                         }
1345 
1346                         ilen = min((pgoff_t)
1347                                 ADDRS_PER_PAGE(dn.node_page, dst_inode) -
1348                                                 dn.ofs_in_node, len - i);
1349                         do {
1350                                 dn.data_blkaddr = f2fs_data_blkaddr(&dn);
1351                                 f2fs_truncate_data_blocks_range(&dn, 1);
1352 
1353                                 if (do_replace[i]) {
1354                                         f2fs_i_blocks_write(src_inode,
1355                                                         1, false, false);
1356                                         f2fs_i_blocks_write(dst_inode,
1357                                                         1, true, false);
1358                                         f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
1359                                         blkaddr[i], ni.version, true, false);
1360 
1361                                         do_replace[i] = 0;
1362                                 }
1363                                 dn.ofs_in_node++;
1364                                 i++;
1365                                 new_size = (loff_t)(dst + i) << PAGE_SHIFT;
1366                                 if (dst_inode->i_size < new_size)
1367                                         f2fs_i_size_write(dst_inode, new_size);
1368                         } while (--ilen && (do_replace[i] || blkaddr[i] == NULL_ADDR));
1369 
1370                         f2fs_put_dnode(&dn);
1371                 } else {
1372                         struct page *psrc, *pdst;
1373 
1374                         psrc = f2fs_get_lock_data_page(src_inode,
1375                                                         src + i, true);
1376                         if (IS_ERR(psrc))
1377                                 return PTR_ERR(psrc);
1378                         pdst = f2fs_get_new_data_page(dst_inode, NULL, dst + i,
1379                                                                 true);
1380                         if (IS_ERR(pdst)) {
1381                                 f2fs_put_page(psrc, 1);
1382                                 return PTR_ERR(pdst);
1383                         }
1384 
1385                         f2fs_wait_on_page_writeback(pdst, DATA, true, true);
1386 
1387                         memcpy_page(pdst, 0, psrc, 0, PAGE_SIZE);
1388                         set_page_dirty(pdst);
1389                         set_page_private_gcing(pdst);
1390                         f2fs_put_page(pdst, 1);
1391                         f2fs_put_page(psrc, 1);
1392 
1393                         ret = f2fs_truncate_hole(src_inode,
1394                                                 src + i, src + i + 1);
1395                         if (ret)
1396                                 return ret;
1397                         i++;
1398                 }
1399         }
1400         return 0;
1401 }
1402 
1403 static int __exchange_data_block(struct inode *src_inode,
1404                         struct inode *dst_inode, pgoff_t src, pgoff_t dst,
1405                         pgoff_t len, bool full)
1406 {
1407         block_t *src_blkaddr;
1408         int *do_replace;
1409         pgoff_t olen;
1410         int ret;
1411 
1412         while (len) {
1413                 olen = min((pgoff_t)4 * ADDRS_PER_BLOCK(src_inode), len);
1414 
1415                 src_blkaddr = f2fs_kvzalloc(F2FS_I_SB(src_inode),
1416                                         array_size(olen, sizeof(block_t)),
1417                                         GFP_NOFS);
1418                 if (!src_blkaddr)
1419                         return -ENOMEM;
1420 
1421                 do_replace = f2fs_kvzalloc(F2FS_I_SB(src_inode),
1422                                         array_size(olen, sizeof(int)),
1423                                         GFP_NOFS);
1424                 if (!do_replace) {
1425                         kvfree(src_blkaddr);
1426                         return -ENOMEM;
1427                 }
1428 
1429                 ret = __read_out_blkaddrs(src_inode, src_blkaddr,
1430                                         do_replace, src, olen);
1431                 if (ret)
1432                         goto roll_back;
1433 
1434                 ret = __clone_blkaddrs(src_inode, dst_inode, src_blkaddr,
1435                                         do_replace, src, dst, olen, full);
1436                 if (ret)
1437                         goto roll_back;
1438 
1439                 src += olen;
1440                 dst += olen;
1441                 len -= olen;
1442 
1443                 kvfree(src_blkaddr);
1444                 kvfree(do_replace);
1445         }
1446         return 0;
1447 
1448 roll_back:
1449         __roll_back_blkaddrs(src_inode, src_blkaddr, do_replace, src, olen);
1450         kvfree(src_blkaddr);
1451         kvfree(do_replace);
1452         return ret;
1453 }
1454 
1455 static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len)
1456 {
1457         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1458         pgoff_t nrpages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
1459         pgoff_t start = offset >> PAGE_SHIFT;
1460         pgoff_t end = (offset + len) >> PAGE_SHIFT;
1461         int ret;
1462 
1463         f2fs_balance_fs(sbi, true);
1464 
1465         /* avoid gc operation during block exchange */
1466         f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1467         filemap_invalidate_lock(inode->i_mapping);
1468 
1469         f2fs_lock_op(sbi);
1470         f2fs_drop_extent_tree(inode);
1471         truncate_pagecache(inode, offset);
1472         ret = __exchange_data_block(inode, inode, end, start, nrpages - end, true);
1473         f2fs_unlock_op(sbi);
1474 
1475         filemap_invalidate_unlock(inode->i_mapping);
1476         f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1477         return ret;
1478 }
1479 
1480 static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
1481 {
1482         loff_t new_size;
1483         int ret;
1484 
1485         if (offset + len >= i_size_read(inode))
1486                 return -EINVAL;
1487 
1488         /* collapse range should be aligned to block size of f2fs. */
1489         if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1))
1490                 return -EINVAL;
1491 
1492         ret = f2fs_convert_inline_inode(inode);
1493         if (ret)
1494                 return ret;
1495 
1496         /* write out all dirty pages from offset */
1497         ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
1498         if (ret)
1499                 return ret;
1500 
1501         ret = f2fs_do_collapse(inode, offset, len);
1502         if (ret)
1503                 return ret;
1504 
1505         /* write out all moved pages, if possible */
1506         filemap_invalidate_lock(inode->i_mapping);
1507         filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
1508         truncate_pagecache(inode, offset);
1509 
1510         new_size = i_size_read(inode) - len;
1511         ret = f2fs_truncate_blocks(inode, new_size, true);
1512         filemap_invalidate_unlock(inode->i_mapping);
1513         if (!ret)
1514                 f2fs_i_size_write(inode, new_size);
1515         return ret;
1516 }
1517 
1518 static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start,
1519                                                                 pgoff_t end)
1520 {
1521         struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
1522         pgoff_t index = start;
1523         unsigned int ofs_in_node = dn->ofs_in_node;
1524         blkcnt_t count = 0;
1525         int ret;
1526 
1527         for (; index < end; index++, dn->ofs_in_node++) {
1528                 if (f2fs_data_blkaddr(dn) == NULL_ADDR)
1529                         count++;
1530         }
1531 
1532         dn->ofs_in_node = ofs_in_node;
1533         ret = f2fs_reserve_new_blocks(dn, count);
1534         if (ret)
1535                 return ret;
1536 
1537         dn->ofs_in_node = ofs_in_node;
1538         for (index = start; index < end; index++, dn->ofs_in_node++) {
1539                 dn->data_blkaddr = f2fs_data_blkaddr(dn);
1540                 /*
1541                  * f2fs_reserve_new_blocks will not guarantee entire block
1542                  * allocation.
1543                  */
1544                 if (dn->data_blkaddr == NULL_ADDR) {
1545                         ret = -ENOSPC;
1546                         break;
1547                 }
1548 
1549                 if (dn->data_blkaddr == NEW_ADDR)
1550                         continue;
1551 
1552                 if (!f2fs_is_valid_blkaddr(sbi, dn->data_blkaddr,
1553                                         DATA_GENERIC_ENHANCE)) {
1554                         ret = -EFSCORRUPTED;
1555                         break;
1556                 }
1557 
1558                 f2fs_invalidate_blocks(sbi, dn->data_blkaddr);
1559                 f2fs_set_data_blkaddr(dn, NEW_ADDR);
1560         }
1561 
1562         f2fs_update_read_extent_cache_range(dn, start, 0, index - start);
1563         f2fs_update_age_extent_cache_range(dn, start, index - start);
1564 
1565         return ret;
1566 }
1567 
1568 static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
1569                                                                 int mode)
1570 {
1571         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1572         struct address_space *mapping = inode->i_mapping;
1573         pgoff_t index, pg_start, pg_end;
1574         loff_t new_size = i_size_read(inode);
1575         loff_t off_start, off_end;
1576         int ret = 0;
1577 
1578         ret = inode_newsize_ok(inode, (len + offset));
1579         if (ret)
1580                 return ret;
1581 
1582         ret = f2fs_convert_inline_inode(inode);
1583         if (ret)
1584                 return ret;
1585 
1586         ret = filemap_write_and_wait_range(mapping, offset, offset + len - 1);
1587         if (ret)
1588                 return ret;
1589 
1590         pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
1591         pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
1592 
1593         off_start = offset & (PAGE_SIZE - 1);
1594         off_end = (offset + len) & (PAGE_SIZE - 1);
1595 
1596         if (pg_start == pg_end) {
1597                 ret = fill_zero(inode, pg_start, off_start,
1598                                                 off_end - off_start);
1599                 if (ret)
1600                         return ret;
1601 
1602                 new_size = max_t(loff_t, new_size, offset + len);
1603         } else {
1604                 if (off_start) {
1605                         ret = fill_zero(inode, pg_start++, off_start,
1606                                                 PAGE_SIZE - off_start);
1607                         if (ret)
1608                                 return ret;
1609 
1610                         new_size = max_t(loff_t, new_size,
1611                                         (loff_t)pg_start << PAGE_SHIFT);
1612                 }
1613 
1614                 for (index = pg_start; index < pg_end;) {
1615                         struct dnode_of_data dn;
1616                         unsigned int end_offset;
1617                         pgoff_t end;
1618 
1619                         f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1620                         filemap_invalidate_lock(mapping);
1621 
1622                         truncate_pagecache_range(inode,
1623                                 (loff_t)index << PAGE_SHIFT,
1624                                 ((loff_t)pg_end << PAGE_SHIFT) - 1);
1625 
1626                         f2fs_lock_op(sbi);
1627 
1628                         set_new_dnode(&dn, inode, NULL, NULL, 0);
1629                         ret = f2fs_get_dnode_of_data(&dn, index, ALLOC_NODE);
1630                         if (ret) {
1631                                 f2fs_unlock_op(sbi);
1632                                 filemap_invalidate_unlock(mapping);
1633                                 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1634                                 goto out;
1635                         }
1636 
1637                         end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
1638                         end = min(pg_end, end_offset - dn.ofs_in_node + index);
1639 
1640                         ret = f2fs_do_zero_range(&dn, index, end);
1641                         f2fs_put_dnode(&dn);
1642 
1643                         f2fs_unlock_op(sbi);
1644                         filemap_invalidate_unlock(mapping);
1645                         f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1646 
1647                         f2fs_balance_fs(sbi, dn.node_changed);
1648 
1649                         if (ret)
1650                                 goto out;
1651 
1652                         index = end;
1653                         new_size = max_t(loff_t, new_size,
1654                                         (loff_t)index << PAGE_SHIFT);
1655                 }
1656 
1657                 if (off_end) {
1658                         ret = fill_zero(inode, pg_end, 0, off_end);
1659                         if (ret)
1660                                 goto out;
1661 
1662                         new_size = max_t(loff_t, new_size, offset + len);
1663                 }
1664         }
1665 
1666 out:
1667         if (new_size > i_size_read(inode)) {
1668                 if (mode & FALLOC_FL_KEEP_SIZE)
1669                         file_set_keep_isize(inode);
1670                 else
1671                         f2fs_i_size_write(inode, new_size);
1672         }
1673         return ret;
1674 }
1675 
1676 static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
1677 {
1678         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1679         struct address_space *mapping = inode->i_mapping;
1680         pgoff_t nr, pg_start, pg_end, delta, idx;
1681         loff_t new_size;
1682         int ret = 0;
1683 
1684         new_size = i_size_read(inode) + len;
1685         ret = inode_newsize_ok(inode, new_size);
1686         if (ret)
1687                 return ret;
1688 
1689         if (offset >= i_size_read(inode))
1690                 return -EINVAL;
1691 
1692         /* insert range should be aligned to block size of f2fs. */
1693         if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1))
1694                 return -EINVAL;
1695 
1696         ret = f2fs_convert_inline_inode(inode);
1697         if (ret)
1698                 return ret;
1699 
1700         f2fs_balance_fs(sbi, true);
1701 
1702         filemap_invalidate_lock(mapping);
1703         ret = f2fs_truncate_blocks(inode, i_size_read(inode), true);
1704         filemap_invalidate_unlock(mapping);
1705         if (ret)
1706                 return ret;
1707 
1708         /* write out all dirty pages from offset */
1709         ret = filemap_write_and_wait_range(mapping, offset, LLONG_MAX);
1710         if (ret)
1711                 return ret;
1712 
1713         pg_start = offset >> PAGE_SHIFT;
1714         pg_end = (offset + len) >> PAGE_SHIFT;
1715         delta = pg_end - pg_start;
1716         idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
1717 
1718         /* avoid gc operation during block exchange */
1719         f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1720         filemap_invalidate_lock(mapping);
1721         truncate_pagecache(inode, offset);
1722 
1723         while (!ret && idx > pg_start) {
1724                 nr = idx - pg_start;
1725                 if (nr > delta)
1726                         nr = delta;
1727                 idx -= nr;
1728 
1729                 f2fs_lock_op(sbi);
1730                 f2fs_drop_extent_tree(inode);
1731 
1732                 ret = __exchange_data_block(inode, inode, idx,
1733                                         idx + delta, nr, false);
1734                 f2fs_unlock_op(sbi);
1735         }
1736         filemap_invalidate_unlock(mapping);
1737         f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1738         if (ret)
1739                 return ret;
1740 
1741         /* write out all moved pages, if possible */
1742         filemap_invalidate_lock(mapping);
1743         ret = filemap_write_and_wait_range(mapping, offset, LLONG_MAX);
1744         truncate_pagecache(inode, offset);
1745         filemap_invalidate_unlock(mapping);
1746 
1747         if (!ret)
1748                 f2fs_i_size_write(inode, new_size);
1749         return ret;
1750 }
1751 
1752 static int f2fs_expand_inode_data(struct inode *inode, loff_t offset,
1753                                         loff_t len, int mode)
1754 {
1755         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1756         struct f2fs_map_blocks map = { .m_next_pgofs = NULL,
1757                         .m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE,
1758                         .m_may_create = true };
1759         struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO,
1760                         .init_gc_type = FG_GC,
1761                         .should_migrate_blocks = false,
1762                         .err_gc_skipped = true,
1763                         .nr_free_secs = 0 };
1764         pgoff_t pg_start, pg_end;
1765         loff_t new_size;
1766         loff_t off_end;
1767         block_t expanded = 0;
1768         int err;
1769 
1770         err = inode_newsize_ok(inode, (len + offset));
1771         if (err)
1772                 return err;
1773 
1774         err = f2fs_convert_inline_inode(inode);
1775         if (err)
1776                 return err;
1777 
1778         f2fs_balance_fs(sbi, true);
1779 
1780         pg_start = ((unsigned long long)offset) >> PAGE_SHIFT;
1781         pg_end = ((unsigned long long)offset + len) >> PAGE_SHIFT;
1782         off_end = (offset + len) & (PAGE_SIZE - 1);
1783 
1784         map.m_lblk = pg_start;
1785         map.m_len = pg_end - pg_start;
1786         if (off_end)
1787                 map.m_len++;
1788 
1789         if (!map.m_len)
1790                 return 0;
1791 
1792         if (f2fs_is_pinned_file(inode)) {
1793                 block_t sec_blks = CAP_BLKS_PER_SEC(sbi);
1794                 block_t sec_len = roundup(map.m_len, sec_blks);
1795 
1796                 map.m_len = sec_blks;
1797 next_alloc:
1798                 if (has_not_enough_free_secs(sbi, 0,
1799                         GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi)))) {
1800                         f2fs_down_write(&sbi->gc_lock);
1801                         stat_inc_gc_call_count(sbi, FOREGROUND);
1802                         err = f2fs_gc(sbi, &gc_control);
1803                         if (err && err != -ENODATA)
1804                                 goto out_err;
1805                 }
1806 
1807                 f2fs_down_write(&sbi->pin_sem);
1808 
1809                 err = f2fs_allocate_pinning_section(sbi);
1810                 if (err) {
1811                         f2fs_up_write(&sbi->pin_sem);
1812                         goto out_err;
1813                 }
1814 
1815                 map.m_seg_type = CURSEG_COLD_DATA_PINNED;
1816                 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRE_DIO);
1817                 file_dont_truncate(inode);
1818 
1819                 f2fs_up_write(&sbi->pin_sem);
1820 
1821                 expanded += map.m_len;
1822                 sec_len -= map.m_len;
1823                 map.m_lblk += map.m_len;
1824                 if (!err && sec_len)
1825                         goto next_alloc;
1826 
1827                 map.m_len = expanded;
1828         } else {
1829                 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRE_AIO);
1830                 expanded = map.m_len;
1831         }
1832 out_err:
1833         if (err) {
1834                 pgoff_t last_off;
1835 
1836                 if (!expanded)
1837                         return err;
1838 
1839                 last_off = pg_start + expanded - 1;
1840 
1841                 /* update new size to the failed position */
1842                 new_size = (last_off == pg_end) ? offset + len :
1843                                         (loff_t)(last_off + 1) << PAGE_SHIFT;
1844         } else {
1845                 new_size = ((loff_t)pg_end << PAGE_SHIFT) + off_end;
1846         }
1847 
1848         if (new_size > i_size_read(inode)) {
1849                 if (mode & FALLOC_FL_KEEP_SIZE)
1850                         file_set_keep_isize(inode);
1851                 else
1852                         f2fs_i_size_write(inode, new_size);
1853         }
1854 
1855         return err;
1856 }
1857 
1858 static long f2fs_fallocate(struct file *file, int mode,
1859                                 loff_t offset, loff_t len)
1860 {
1861         struct inode *inode = file_inode(file);
1862         long ret = 0;
1863 
1864         if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
1865                 return -EIO;
1866         if (!f2fs_is_checkpoint_ready(F2FS_I_SB(inode)))
1867                 return -ENOSPC;
1868         if (!f2fs_is_compress_backend_ready(inode))
1869                 return -EOPNOTSUPP;
1870 
1871         /* f2fs only support ->fallocate for regular file */
1872         if (!S_ISREG(inode->i_mode))
1873                 return -EINVAL;
1874 
1875         if (IS_ENCRYPTED(inode) &&
1876                 (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE)))
1877                 return -EOPNOTSUPP;
1878 
1879         if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
1880                         FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |
1881                         FALLOC_FL_INSERT_RANGE))
1882                 return -EOPNOTSUPP;
1883 
1884         inode_lock(inode);
1885 
1886         /*
1887          * Pinned file should not support partial truncation since the block
1888          * can be used by applications.
1889          */
1890         if ((f2fs_compressed_file(inode) || f2fs_is_pinned_file(inode)) &&
1891                 (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE |
1892                         FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE))) {
1893                 ret = -EOPNOTSUPP;
1894                 goto out;
1895         }
1896 
1897         ret = file_modified(file);
1898         if (ret)
1899                 goto out;
1900 
1901         /*
1902          * wait for inflight dio, blocks should be removed after IO
1903          * completion.
1904          */
1905         inode_dio_wait(inode);
1906 
1907         if (mode & FALLOC_FL_PUNCH_HOLE) {
1908                 if (offset >= inode->i_size)
1909                         goto out;
1910 
1911                 ret = f2fs_punch_hole(inode, offset, len);
1912         } else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
1913                 ret = f2fs_collapse_range(inode, offset, len);
1914         } else if (mode & FALLOC_FL_ZERO_RANGE) {
1915                 ret = f2fs_zero_range(inode, offset, len, mode);
1916         } else if (mode & FALLOC_FL_INSERT_RANGE) {
1917                 ret = f2fs_insert_range(inode, offset, len);
1918         } else {
1919                 ret = f2fs_expand_inode_data(inode, offset, len, mode);
1920         }
1921 
1922         if (!ret) {
1923                 inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
1924                 f2fs_mark_inode_dirty_sync(inode, false);
1925                 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
1926         }
1927 
1928 out:
1929         inode_unlock(inode);
1930 
1931         trace_f2fs_fallocate(inode, mode, offset, len, ret);
1932         return ret;
1933 }
1934 
1935 static int f2fs_release_file(struct inode *inode, struct file *filp)
1936 {
1937         /*
1938          * f2fs_release_file is called at every close calls. So we should
1939          * not drop any inmemory pages by close called by other process.
1940          */
1941         if (!(filp->f_mode & FMODE_WRITE) ||
1942                         atomic_read(&inode->i_writecount) != 1)
1943                 return 0;
1944 
1945         inode_lock(inode);
1946         f2fs_abort_atomic_write(inode, true);
1947         inode_unlock(inode);
1948 
1949         return 0;
1950 }
1951 
1952 static int f2fs_file_flush(struct file *file, fl_owner_t id)
1953 {
1954         struct inode *inode = file_inode(file);
1955 
1956         /*
1957          * If the process doing a transaction is crashed, we should do
1958          * roll-back. Otherwise, other reader/write can see corrupted database
1959          * until all the writers close its file. Since this should be done
1960          * before dropping file lock, it needs to do in ->flush.
1961          */
1962         if (F2FS_I(inode)->atomic_write_task == current &&
1963                                 (current->flags & PF_EXITING)) {
1964                 inode_lock(inode);
1965                 f2fs_abort_atomic_write(inode, true);
1966                 inode_unlock(inode);
1967         }
1968 
1969         return 0;
1970 }
1971 
1972 static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask)
1973 {
1974         struct f2fs_inode_info *fi = F2FS_I(inode);
1975         u32 masked_flags = fi->i_flags & mask;
1976 
1977         /* mask can be shrunk by flags_valid selector */
1978         iflags &= mask;
1979 
1980         /* Is it quota file? Do not allow user to mess with it */
1981         if (IS_NOQUOTA(inode))
1982                 return -EPERM;
1983 
1984         if ((iflags ^ masked_flags) & F2FS_CASEFOLD_FL) {
1985                 if (!f2fs_sb_has_casefold(F2FS_I_SB(inode)))
1986                         return -EOPNOTSUPP;
1987                 if (!f2fs_empty_dir(inode))
1988                         return -ENOTEMPTY;
1989         }
1990 
1991         if (iflags & (F2FS_COMPR_FL | F2FS_NOCOMP_FL)) {
1992                 if (!f2fs_sb_has_compression(F2FS_I_SB(inode)))
1993                         return -EOPNOTSUPP;
1994                 if ((iflags & F2FS_COMPR_FL) && (iflags & F2FS_NOCOMP_FL))
1995                         return -EINVAL;
1996         }
1997 
1998         if ((iflags ^ masked_flags) & F2FS_COMPR_FL) {
1999                 if (masked_flags & F2FS_COMPR_FL) {
2000                         if (!f2fs_disable_compressed_file(inode))
2001                                 return -EINVAL;
2002                 } else {
2003                         /* try to convert inline_data to support compression */
2004                         int err = f2fs_convert_inline_inode(inode);
2005                         if (err)
2006                                 return err;
2007 
2008                         f2fs_down_write(&fi->i_sem);
2009                         if (!f2fs_may_compress(inode) ||
2010                                         (S_ISREG(inode->i_mode) &&
2011                                         F2FS_HAS_BLOCKS(inode))) {
2012                                 f2fs_up_write(&fi->i_sem);
2013                                 return -EINVAL;
2014                         }
2015                         err = set_compress_context(inode);
2016                         f2fs_up_write(&fi->i_sem);
2017 
2018                         if (err)
2019                                 return err;
2020                 }
2021         }
2022 
2023         fi->i_flags = iflags | (fi->i_flags & ~mask);
2024         f2fs_bug_on(F2FS_I_SB(inode), (fi->i_flags & F2FS_COMPR_FL) &&
2025                                         (fi->i_flags & F2FS_NOCOMP_FL));
2026 
2027         if (fi->i_flags & F2FS_PROJINHERIT_FL)
2028                 set_inode_flag(inode, FI_PROJ_INHERIT);
2029         else
2030                 clear_inode_flag(inode, FI_PROJ_INHERIT);
2031 
2032         inode_set_ctime_current(inode);
2033         f2fs_set_inode_flags(inode);
2034         f2fs_mark_inode_dirty_sync(inode, true);
2035         return 0;
2036 }
2037 
2038 /* FS_IOC_[GS]ETFLAGS and FS_IOC_FS[GS]ETXATTR support */
2039 
2040 /*
2041  * To make a new on-disk f2fs i_flag gettable via FS_IOC_GETFLAGS, add an entry
2042  * for it to f2fs_fsflags_map[], and add its FS_*_FL equivalent to
2043  * F2FS_GETTABLE_FS_FL.  To also make it settable via FS_IOC_SETFLAGS, also add
2044  * its FS_*_FL equivalent to F2FS_SETTABLE_FS_FL.
2045  *
2046  * Translating flags to fsx_flags value used by FS_IOC_FSGETXATTR and
2047  * FS_IOC_FSSETXATTR is done by the VFS.
2048  */
2049 
2050 static const struct {
2051         u32 iflag;
2052         u32 fsflag;
2053 } f2fs_fsflags_map[] = {
2054         { F2FS_COMPR_FL,        FS_COMPR_FL },
2055         { F2FS_SYNC_FL,         FS_SYNC_FL },
2056         { F2FS_IMMUTABLE_FL,    FS_IMMUTABLE_FL },
2057         { F2FS_APPEND_FL,       FS_APPEND_FL },
2058         { F2FS_NODUMP_FL,       FS_NODUMP_FL },
2059         { F2FS_NOATIME_FL,      FS_NOATIME_FL },
2060         { F2FS_NOCOMP_FL,       FS_NOCOMP_FL },
2061         { F2FS_INDEX_FL,        FS_INDEX_FL },
2062         { F2FS_DIRSYNC_FL,      FS_DIRSYNC_FL },
2063         { F2FS_PROJINHERIT_FL,  FS_PROJINHERIT_FL },
2064         { F2FS_CASEFOLD_FL,     FS_CASEFOLD_FL },
2065 };
2066 
2067 #define F2FS_GETTABLE_FS_FL (           \
2068                 FS_COMPR_FL |           \
2069                 FS_SYNC_FL |            \
2070                 FS_IMMUTABLE_FL |       \
2071                 FS_APPEND_FL |          \
2072                 FS_NODUMP_FL |          \
2073                 FS_NOATIME_FL |         \
2074                 FS_NOCOMP_FL |          \
2075                 FS_INDEX_FL |           \
2076                 FS_DIRSYNC_FL |         \
2077                 FS_PROJINHERIT_FL |     \
2078                 FS_ENCRYPT_FL |         \
2079                 FS_INLINE_DATA_FL |     \
2080                 FS_NOCOW_FL |           \
2081                 FS_VERITY_FL |          \
2082                 FS_CASEFOLD_FL)
2083 
2084 #define F2FS_SETTABLE_FS_FL (           \
2085                 FS_COMPR_FL |           \
2086                 FS_SYNC_FL |            \
2087                 FS_IMMUTABLE_FL |       \
2088                 FS_APPEND_FL |          \
2089                 FS_NODUMP_FL |          \
2090                 FS_NOATIME_FL |         \
2091                 FS_NOCOMP_FL |          \
2092                 FS_DIRSYNC_FL |         \
2093                 FS_PROJINHERIT_FL |     \
2094                 FS_CASEFOLD_FL)
2095 
2096 /* Convert f2fs on-disk i_flags to FS_IOC_{GET,SET}FLAGS flags */
2097 static inline u32 f2fs_iflags_to_fsflags(u32 iflags)
2098 {
2099         u32 fsflags = 0;
2100         int i;
2101 
2102         for (i = 0; i < ARRAY_SIZE(f2fs_fsflags_map); i++)
2103                 if (iflags & f2fs_fsflags_map[i].iflag)
2104                         fsflags |= f2fs_fsflags_map[i].fsflag;
2105 
2106         return fsflags;
2107 }
2108 
2109 /* Convert FS_IOC_{GET,SET}FLAGS flags to f2fs on-disk i_flags */
2110 static inline u32 f2fs_fsflags_to_iflags(u32 fsflags)
2111 {
2112         u32 iflags = 0;
2113         int i;
2114 
2115         for (i = 0; i < ARRAY_SIZE(f2fs_fsflags_map); i++)
2116                 if (fsflags & f2fs_fsflags_map[i].fsflag)
2117                         iflags |= f2fs_fsflags_map[i].iflag;
2118 
2119         return iflags;
2120 }
2121 
2122 static int f2fs_ioc_getversion(struct file *filp, unsigned long arg)
2123 {
2124         struct inode *inode = file_inode(filp);
2125 
2126         return put_user(inode->i_generation, (int __user *)arg);
2127 }
2128 
2129 static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate)
2130 {
2131         struct inode *inode = file_inode(filp);
2132         struct mnt_idmap *idmap = file_mnt_idmap(filp);
2133         struct f2fs_inode_info *fi = F2FS_I(inode);
2134         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2135         loff_t isize;
2136         int ret;
2137 
2138         if (!(filp->f_mode & FMODE_WRITE))
2139                 return -EBADF;
2140 
2141         if (!inode_owner_or_capable(idmap, inode))
2142                 return -EACCES;
2143 
2144         if (!S_ISREG(inode->i_mode))
2145                 return -EINVAL;
2146 
2147         if (filp->f_flags & O_DIRECT)
2148                 return -EINVAL;
2149 
2150         ret = mnt_want_write_file(filp);
2151         if (ret)
2152                 return ret;
2153 
2154         inode_lock(inode);
2155 
2156         if (!f2fs_disable_compressed_file(inode) ||
2157                         f2fs_is_pinned_file(inode)) {
2158                 ret = -EINVAL;
2159                 goto out;
2160         }
2161 
2162         if (f2fs_is_atomic_file(inode))
2163                 goto out;
2164 
2165         ret = f2fs_convert_inline_inode(inode);
2166         if (ret)
2167                 goto out;
2168 
2169         f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
2170 
2171         /*
2172          * Should wait end_io to count F2FS_WB_CP_DATA correctly by
2173          * f2fs_is_atomic_file.
2174          */
2175         if (get_dirty_pages(inode))
2176                 f2fs_warn(sbi, "Unexpected flush for atomic writes: ino=%lu, npages=%u",
2177                           inode->i_ino, get_dirty_pages(inode));
2178         ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
2179         if (ret) {
2180                 f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
2181                 goto out;
2182         }
2183 
2184         /* Check if the inode already has a COW inode */
2185         if (fi->cow_inode == NULL) {
2186                 /* Create a COW inode for atomic write */
2187                 struct dentry *dentry = file_dentry(filp);
2188                 struct inode *dir = d_inode(dentry->d_parent);
2189 
2190                 ret = f2fs_get_tmpfile(idmap, dir, &fi->cow_inode);
2191                 if (ret) {
2192                         f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
2193                         goto out;
2194                 }
2195 
2196                 set_inode_flag(fi->cow_inode, FI_COW_FILE);
2197                 clear_inode_flag(fi->cow_inode, FI_INLINE_DATA);
2198 
2199                 /* Set the COW inode's atomic_inode to the atomic inode */
2200                 F2FS_I(fi->cow_inode)->atomic_inode = inode;
2201         } else {
2202                 /* Reuse the already created COW inode */
2203                 f2fs_bug_on(sbi, get_dirty_pages(fi->cow_inode));
2204 
2205                 invalidate_mapping_pages(fi->cow_inode->i_mapping, 0, -1);
2206 
2207                 ret = f2fs_do_truncate_blocks(fi->cow_inode, 0, true);
2208                 if (ret) {
2209                         f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
2210                         goto out;
2211                 }
2212         }
2213 
2214         f2fs_write_inode(inode, NULL);
2215 
2216         stat_inc_atomic_inode(inode);
2217 
2218         set_inode_flag(inode, FI_ATOMIC_FILE);
2219 
2220         isize = i_size_read(inode);
2221         fi->original_i_size = isize;
2222         if (truncate) {
2223                 set_inode_flag(inode, FI_ATOMIC_REPLACE);
2224                 truncate_inode_pages_final(inode->i_mapping);
2225                 f2fs_i_size_write(inode, 0);
2226                 isize = 0;
2227         }
2228         f2fs_i_size_write(fi->cow_inode, isize);
2229 
2230         f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
2231 
2232         f2fs_update_time(sbi, REQ_TIME);
2233         fi->atomic_write_task = current;
2234         stat_update_max_atomic_write(inode);
2235         fi->atomic_write_cnt = 0;
2236 out:
2237         inode_unlock(inode);
2238         mnt_drop_write_file(filp);
2239         return ret;
2240 }
2241 
2242 static int f2fs_ioc_commit_atomic_write(struct file *filp)
2243 {
2244         struct inode *inode = file_inode(filp);
2245         struct mnt_idmap *idmap = file_mnt_idmap(filp);
2246         int ret;
2247 
2248         if (!(filp->f_mode & FMODE_WRITE))
2249                 return -EBADF;
2250 
2251         if (!inode_owner_or_capable(idmap, inode))
2252                 return -EACCES;
2253 
2254         ret = mnt_want_write_file(filp);
2255         if (ret)
2256                 return ret;
2257 
2258         f2fs_balance_fs(F2FS_I_SB(inode), true);
2259 
2260         inode_lock(inode);
2261 
2262         if (f2fs_is_atomic_file(inode)) {
2263                 ret = f2fs_commit_atomic_write(inode);
2264                 if (!ret)
2265                         ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
2266 
2267                 f2fs_abort_atomic_write(inode, ret);
2268         } else {
2269                 ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false);
2270         }
2271 
2272         inode_unlock(inode);
2273         mnt_drop_write_file(filp);
2274         return ret;
2275 }
2276 
2277 static int f2fs_ioc_abort_atomic_write(struct file *filp)
2278 {
2279         struct inode *inode = file_inode(filp);
2280         struct mnt_idmap *idmap = file_mnt_idmap(filp);
2281         int ret;
2282 
2283         if (!(filp->f_mode & FMODE_WRITE))
2284                 return -EBADF;
2285 
2286         if (!inode_owner_or_capable(idmap, inode))
2287                 return -EACCES;
2288 
2289         ret = mnt_want_write_file(filp);
2290         if (ret)
2291                 return ret;
2292 
2293         inode_lock(inode);
2294 
2295         f2fs_abort_atomic_write(inode, true);
2296 
2297         inode_unlock(inode);
2298 
2299         mnt_drop_write_file(filp);
2300         f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
2301         return ret;
2302 }
2303 
2304 int f2fs_do_shutdown(struct f2fs_sb_info *sbi, unsigned int flag,
2305                                                 bool readonly, bool need_lock)
2306 {
2307         struct super_block *sb = sbi->sb;
2308         int ret = 0;
2309 
2310         switch (flag) {
2311         case F2FS_GOING_DOWN_FULLSYNC:
2312                 ret = bdev_freeze(sb->s_bdev);
2313                 if (ret)
2314                         goto out;
2315                 f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
2316                 bdev_thaw(sb->s_bdev);
2317                 break;
2318         case F2FS_GOING_DOWN_METASYNC:
2319                 /* do checkpoint only */
2320                 ret = f2fs_sync_fs(sb, 1);
2321                 if (ret) {
2322                         if (ret == -EIO)
2323                                 ret = 0;
2324                         goto out;
2325                 }
2326                 f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
2327                 break;
2328         case F2FS_GOING_DOWN_NOSYNC:
2329                 f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
2330                 break;
2331         case F2FS_GOING_DOWN_METAFLUSH:
2332                 f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO);
2333                 f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
2334                 break;
2335         case F2FS_GOING_DOWN_NEED_FSCK:
2336                 set_sbi_flag(sbi, SBI_NEED_FSCK);
2337                 set_sbi_flag(sbi, SBI_CP_DISABLED_QUICK);
2338                 set_sbi_flag(sbi, SBI_IS_DIRTY);
2339                 /* do checkpoint only */
2340                 ret = f2fs_sync_fs(sb, 1);
2341                 if (ret == -EIO)
2342                         ret = 0;
2343                 goto out;
2344         default:
2345                 ret = -EINVAL;
2346                 goto out;
2347         }
2348 
2349         if (readonly)
2350                 goto out;
2351 
2352         /* grab sb->s_umount to avoid racing w/ remount() */
2353         if (need_lock)
2354                 down_read(&sbi->sb->s_umount);
2355 
2356         f2fs_stop_gc_thread(sbi);
2357         f2fs_stop_discard_thread(sbi);
2358 
2359         f2fs_drop_discard_cmd(sbi);
2360         clear_opt(sbi, DISCARD);
2361 
2362         if (need_lock)
2363                 up_read(&sbi->sb->s_umount);
2364 
2365         f2fs_update_time(sbi, REQ_TIME);
2366 out:
2367 
2368         trace_f2fs_shutdown(sbi, flag, ret);
2369 
2370         return ret;
2371 }
2372 
2373 static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
2374 {
2375         struct inode *inode = file_inode(filp);
2376         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2377         __u32 in;
2378         int ret;
2379         bool need_drop = false, readonly = false;
2380 
2381         if (!capable(CAP_SYS_ADMIN))
2382                 return -EPERM;
2383 
2384         if (get_user(in, (__u32 __user *)arg))
2385                 return -EFAULT;
2386 
2387         if (in != F2FS_GOING_DOWN_FULLSYNC) {
2388                 ret = mnt_want_write_file(filp);
2389                 if (ret) {
2390                         if (ret != -EROFS)
2391                                 return ret;
2392 
2393                         /* fallback to nosync shutdown for readonly fs */
2394                         in = F2FS_GOING_DOWN_NOSYNC;
2395                         readonly = true;
2396                 } else {
2397                         need_drop = true;
2398                 }
2399         }
2400 
2401         ret = f2fs_do_shutdown(sbi, in, readonly, true);
2402 
2403         if (need_drop)
2404                 mnt_drop_write_file(filp);
2405 
2406         return ret;
2407 }
2408 
2409 static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
2410 {
2411         struct inode *inode = file_inode(filp);
2412         struct super_block *sb = inode->i_sb;
2413         struct fstrim_range range;
2414         int ret;
2415 
2416         if (!capable(CAP_SYS_ADMIN))
2417                 return -EPERM;
2418 
2419         if (!f2fs_hw_support_discard(F2FS_SB(sb)))
2420                 return -EOPNOTSUPP;
2421 
2422         if (copy_from_user(&range, (struct fstrim_range __user *)arg,
2423                                 sizeof(range)))
2424                 return -EFAULT;
2425 
2426         ret = mnt_want_write_file(filp);
2427         if (ret)
2428                 return ret;
2429 
2430         range.minlen = max((unsigned int)range.minlen,
2431                            bdev_discard_granularity(sb->s_bdev));
2432         ret = f2fs_trim_fs(F2FS_SB(sb), &range);
2433         mnt_drop_write_file(filp);
2434         if (ret < 0)
2435                 return ret;
2436 
2437         if (copy_to_user((struct fstrim_range __user *)arg, &range,
2438                                 sizeof(range)))
2439                 return -EFAULT;
2440         f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
2441         return 0;
2442 }
2443 
2444 static bool uuid_is_nonzero(__u8 u[16])
2445 {
2446         int i;
2447 
2448         for (i = 0; i < 16; i++)
2449                 if (u[i])
2450                         return true;
2451         return false;
2452 }
2453 
2454 static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg)
2455 {
2456         struct inode *inode = file_inode(filp);
2457         int ret;
2458 
2459         if (!f2fs_sb_has_encrypt(F2FS_I_SB(inode)))
2460                 return -EOPNOTSUPP;
2461 
2462         ret = fscrypt_ioctl_set_policy(filp, (const void __user *)arg);
2463         f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
2464         return ret;
2465 }
2466 
2467 static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg)
2468 {
2469         if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
2470                 return -EOPNOTSUPP;
2471         return fscrypt_ioctl_get_policy(filp, (void __user *)arg);
2472 }
2473 
2474 static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg)
2475 {
2476         struct inode *inode = file_inode(filp);
2477         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2478         u8 encrypt_pw_salt[16];
2479         int err;
2480 
2481         if (!f2fs_sb_has_encrypt(sbi))
2482                 return -EOPNOTSUPP;
2483 
2484         err = mnt_want_write_file(filp);
2485         if (err)
2486                 return err;
2487 
2488         f2fs_down_write(&sbi->sb_lock);
2489 
2490         if (uuid_is_nonzero(sbi->raw_super->encrypt_pw_salt))
2491                 goto got_it;
2492 
2493         /* update superblock with uuid */
2494         generate_random_uuid(sbi->raw_super->encrypt_pw_salt);
2495 
2496         err = f2fs_commit_super(sbi, false);
2497         if (err) {
2498                 /* undo new data */
2499                 memset(sbi->raw_super->encrypt_pw_salt, 0, 16);
2500                 goto out_err;
2501         }
2502 got_it:
2503         memcpy(encrypt_pw_salt, sbi->raw_super->encrypt_pw_salt, 16);
2504 out_err:
2505         f2fs_up_write(&sbi->sb_lock);
2506         mnt_drop_write_file(filp);
2507 
2508         if (!err && copy_to_user((__u8 __user *)arg, encrypt_pw_salt, 16))
2509                 err = -EFAULT;
2510 
2511         return err;
2512 }
2513 
2514 static int f2fs_ioc_get_encryption_policy_ex(struct file *filp,
2515                                              unsigned long arg)
2516 {
2517         if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
2518                 return -EOPNOTSUPP;
2519 
2520         return fscrypt_ioctl_get_policy_ex(filp, (void __user *)arg);
2521 }
2522 
2523 static int f2fs_ioc_add_encryption_key(struct file *filp, unsigned long arg)
2524 {
2525         if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
2526                 return -EOPNOTSUPP;
2527 
2528         return fscrypt_ioctl_add_key(filp, (void __user *)arg);
2529 }
2530 
2531 static int f2fs_ioc_remove_encryption_key(struct file *filp, unsigned long arg)
2532 {
2533         if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
2534                 return -EOPNOTSUPP;
2535 
2536         return fscrypt_ioctl_remove_key(filp, (void __user *)arg);
2537 }
2538 
2539 static int f2fs_ioc_remove_encryption_key_all_users(struct file *filp,
2540                                                     unsigned long arg)
2541 {
2542         if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
2543                 return -EOPNOTSUPP;
2544 
2545         return fscrypt_ioctl_remove_key_all_users(filp, (void __user *)arg);
2546 }
2547 
2548 static int f2fs_ioc_get_encryption_key_status(struct file *filp,
2549                                               unsigned long arg)
2550 {
2551         if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
2552                 return -EOPNOTSUPP;
2553 
2554         return fscrypt_ioctl_get_key_status(filp, (void __user *)arg);
2555 }
2556 
2557 static int f2fs_ioc_get_encryption_nonce(struct file *filp, unsigned long arg)
2558 {
2559         if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
2560                 return -EOPNOTSUPP;
2561 
2562         return fscrypt_ioctl_get_nonce(filp, (void __user *)arg);
2563 }
2564 
2565 static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
2566 {
2567         struct inode *inode = file_inode(filp);
2568         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2569         struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO,
2570                         .no_bg_gc = false,
2571                         .should_migrate_blocks = false,
2572                         .nr_free_secs = 0 };
2573         __u32 sync;
2574         int ret;
2575 
2576         if (!capable(CAP_SYS_ADMIN))
2577                 return -EPERM;
2578 
2579         if (get_user(sync, (__u32 __user *)arg))
2580                 return -EFAULT;
2581 
2582         if (f2fs_readonly(sbi->sb))
2583                 return -EROFS;
2584 
2585         ret = mnt_want_write_file(filp);
2586         if (ret)
2587                 return ret;
2588 
2589         if (!sync) {
2590                 if (!f2fs_down_write_trylock(&sbi->gc_lock)) {
2591                         ret = -EBUSY;
2592                         goto out;
2593                 }
2594         } else {
2595                 f2fs_down_write(&sbi->gc_lock);
2596         }
2597 
2598         gc_control.init_gc_type = sync ? FG_GC : BG_GC;
2599         gc_control.err_gc_skipped = sync;
2600         stat_inc_gc_call_count(sbi, FOREGROUND);
2601         ret = f2fs_gc(sbi, &gc_control);
2602 out:
2603         mnt_drop_write_file(filp);
2604         return ret;
2605 }
2606 
2607 static int __f2fs_ioc_gc_range(struct file *filp, struct f2fs_gc_range *range)
2608 {
2609         struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp));
2610         struct f2fs_gc_control gc_control = {
2611                         .init_gc_type = range->sync ? FG_GC : BG_GC,
2612                         .no_bg_gc = false,
2613                         .should_migrate_blocks = false,
2614                         .err_gc_skipped = range->sync,
2615                         .nr_free_secs = 0 };
2616         u64 end;
2617         int ret;
2618 
2619         if (!capable(CAP_SYS_ADMIN))
2620                 return -EPERM;
2621         if (f2fs_readonly(sbi->sb))
2622                 return -EROFS;
2623 
2624         end = range->start + range->len;
2625         if (end < range->start || range->start < MAIN_BLKADDR(sbi) ||
2626                                         end >= MAX_BLKADDR(sbi))
2627                 return -EINVAL;
2628 
2629         ret = mnt_want_write_file(filp);
2630         if (ret)
2631                 return ret;
2632 
2633 do_more:
2634         if (!range->sync) {
2635                 if (!f2fs_down_write_trylock(&sbi->gc_lock)) {
2636                         ret = -EBUSY;
2637                         goto out;
2638                 }
2639         } else {
2640                 f2fs_down_write(&sbi->gc_lock);
2641         }
2642 
2643         gc_control.victim_segno = GET_SEGNO(sbi, range->start);
2644         stat_inc_gc_call_count(sbi, FOREGROUND);
2645         ret = f2fs_gc(sbi, &gc_control);
2646         if (ret) {
2647                 if (ret == -EBUSY)
2648                         ret = -EAGAIN;
2649                 goto out;
2650         }
2651         range->start += CAP_BLKS_PER_SEC(sbi);
2652         if (range->start <= end)
2653                 goto do_more;
2654 out:
2655         mnt_drop_write_file(filp);
2656         return ret;
2657 }
2658 
2659 static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg)
2660 {
2661         struct f2fs_gc_range range;
2662 
2663         if (copy_from_user(&range, (struct f2fs_gc_range __user *)arg,
2664                                                         sizeof(range)))
2665                 return -EFAULT;
2666         return __f2fs_ioc_gc_range(filp, &range);
2667 }
2668 
2669 static int f2fs_ioc_write_checkpoint(struct file *filp)
2670 {
2671         struct inode *inode = file_inode(filp);
2672         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2673         int ret;
2674 
2675         if (!capable(CAP_SYS_ADMIN))
2676                 return -EPERM;
2677 
2678         if (f2fs_readonly(sbi->sb))
2679                 return -EROFS;
2680 
2681         if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
2682                 f2fs_info(sbi, "Skipping Checkpoint. Checkpoints currently disabled.");
2683                 return -EINVAL;
2684         }
2685 
2686         ret = mnt_want_write_file(filp);
2687         if (ret)
2688                 return ret;
2689 
2690         ret = f2fs_sync_fs(sbi->sb, 1);
2691 
2692         mnt_drop_write_file(filp);
2693         return ret;
2694 }
2695 
2696 static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
2697                                         struct file *filp,
2698                                         struct f2fs_defragment *range)
2699 {
2700         struct inode *inode = file_inode(filp);
2701         struct f2fs_map_blocks map = { .m_next_extent = NULL,
2702                                         .m_seg_type = NO_CHECK_TYPE,
2703                                         .m_may_create = false };
2704         struct extent_info ei = {};
2705         pgoff_t pg_start, pg_end, next_pgofs;
2706         unsigned int total = 0, sec_num;
2707         block_t blk_end = 0;
2708         bool fragmented = false;
2709         int err;
2710 
2711         f2fs_balance_fs(sbi, true);
2712 
2713         inode_lock(inode);
2714         pg_start = range->start >> PAGE_SHIFT;
2715         pg_end = min_t(pgoff_t,
2716                                 (range->start + range->len) >> PAGE_SHIFT,
2717                                 DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE));
2718 
2719         if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) ||
2720                 f2fs_is_atomic_file(inode)) {
2721                 err = -EINVAL;
2722                 goto unlock_out;
2723         }
2724 
2725         /* if in-place-update policy is enabled, don't waste time here */
2726         set_inode_flag(inode, FI_OPU_WRITE);
2727         if (f2fs_should_update_inplace(inode, NULL)) {
2728                 err = -EINVAL;
2729                 goto out;
2730         }
2731 
2732         /* writeback all dirty pages in the range */
2733         err = filemap_write_and_wait_range(inode->i_mapping,
2734                                                 pg_start << PAGE_SHIFT,
2735                                                 (pg_end << PAGE_SHIFT) - 1);
2736         if (err)
2737                 goto out;
2738 
2739         /*
2740          * lookup mapping info in extent cache, skip defragmenting if physical
2741          * block addresses are continuous.
2742          */
2743         if (f2fs_lookup_read_extent_cache(inode, pg_start, &ei)) {
2744                 if ((pgoff_t)ei.fofs + ei.len >= pg_end)
2745                         goto out;
2746         }
2747 
2748         map.m_lblk = pg_start;
2749         map.m_next_pgofs = &next_pgofs;
2750 
2751         /*
2752          * lookup mapping info in dnode page cache, skip defragmenting if all
2753          * physical block addresses are continuous even if there are hole(s)
2754          * in logical blocks.
2755          */
2756         while (map.m_lblk < pg_end) {
2757                 map.m_len = pg_end - map.m_lblk;
2758                 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DEFAULT);
2759                 if (err)
2760                         goto out;
2761 
2762                 if (!(map.m_flags & F2FS_MAP_FLAGS)) {
2763                         map.m_lblk = next_pgofs;
2764                         continue;
2765                 }
2766 
2767                 if (blk_end && blk_end != map.m_pblk)
2768                         fragmented = true;
2769 
2770                 /* record total count of block that we're going to move */
2771                 total += map.m_len;
2772 
2773                 blk_end = map.m_pblk + map.m_len;
2774 
2775                 map.m_lblk += map.m_len;
2776         }
2777 
2778         if (!fragmented) {
2779                 total = 0;
2780                 goto out;
2781         }
2782 
2783         sec_num = DIV_ROUND_UP(total, CAP_BLKS_PER_SEC(sbi));
2784 
2785         /*
2786          * make sure there are enough free section for LFS allocation, this can
2787          * avoid defragment running in SSR mode when free section are allocated
2788          * intensively
2789          */
2790         if (has_not_enough_free_secs(sbi, 0, sec_num)) {
2791                 err = -EAGAIN;
2792                 goto out;
2793         }
2794 
2795         map.m_lblk = pg_start;
2796         map.m_len = pg_end - pg_start;
2797         total = 0;
2798 
2799         while (map.m_lblk < pg_end) {
2800                 pgoff_t idx;
2801                 int cnt = 0;
2802 
2803 do_map:
2804                 map.m_len = pg_end - map.m_lblk;
2805                 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DEFAULT);
2806                 if (err)
2807                         goto clear_out;
2808 
2809                 if (!(map.m_flags & F2FS_MAP_FLAGS)) {
2810                         map.m_lblk = next_pgofs;
2811                         goto check;
2812                 }
2813 
2814                 set_inode_flag(inode, FI_SKIP_WRITES);
2815 
2816                 idx = map.m_lblk;
2817                 while (idx < map.m_lblk + map.m_len &&
2818                                                 cnt < BLKS_PER_SEG(sbi)) {
2819                         struct page *page;
2820 
2821                         page = f2fs_get_lock_data_page(inode, idx, true);
2822                         if (IS_ERR(page)) {
2823                                 err = PTR_ERR(page);
2824                                 goto clear_out;
2825                         }
2826 
2827                         f2fs_wait_on_page_writeback(page, DATA, true, true);
2828 
2829                         set_page_dirty(page);
2830                         set_page_private_gcing(page);
2831                         f2fs_put_page(page, 1);
2832 
2833                         idx++;
2834                         cnt++;
2835                         total++;
2836                 }
2837 
2838                 map.m_lblk = idx;
2839 check:
2840                 if (map.m_lblk < pg_end && cnt < BLKS_PER_SEG(sbi))
2841                         goto do_map;
2842 
2843                 clear_inode_flag(inode, FI_SKIP_WRITES);
2844 
2845                 err = filemap_fdatawrite(inode->i_mapping);
2846                 if (err)
2847                         goto out;
2848         }
2849 clear_out:
2850         clear_inode_flag(inode, FI_SKIP_WRITES);
2851 out:
2852         clear_inode_flag(inode, FI_OPU_WRITE);
2853 unlock_out:
2854         inode_unlock(inode);
2855         if (!err)
2856                 range->len = (u64)total << PAGE_SHIFT;
2857         return err;
2858 }
2859 
2860 static int f2fs_ioc_defragment(struct file *filp, unsigned long arg)
2861 {
2862         struct inode *inode = file_inode(filp);
2863         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2864         struct f2fs_defragment range;
2865         int err;
2866 
2867         if (!capable(CAP_SYS_ADMIN))
2868                 return -EPERM;
2869 
2870         if (!S_ISREG(inode->i_mode) || f2fs_is_atomic_file(inode))
2871                 return -EINVAL;
2872 
2873         if (f2fs_readonly(sbi->sb))
2874                 return -EROFS;
2875 
2876         if (copy_from_user(&range, (struct f2fs_defragment __user *)arg,
2877                                                         sizeof(range)))
2878                 return -EFAULT;
2879 
2880         /* verify alignment of offset & size */
2881         if (range.start & (F2FS_BLKSIZE - 1) || range.len & (F2FS_BLKSIZE - 1))
2882                 return -EINVAL;
2883 
2884         if (unlikely((range.start + range.len) >> PAGE_SHIFT >
2885                                         max_file_blocks(inode)))
2886                 return -EINVAL;
2887 
2888         err = mnt_want_write_file(filp);
2889         if (err)
2890                 return err;
2891 
2892         err = f2fs_defragment_range(sbi, filp, &range);
2893         mnt_drop_write_file(filp);
2894 
2895         if (range.len)
2896                 f2fs_update_time(sbi, REQ_TIME);
2897         if (err < 0)
2898                 return err;
2899 
2900         if (copy_to_user((struct f2fs_defragment __user *)arg, &range,
2901                                                         sizeof(range)))
2902                 return -EFAULT;
2903 
2904         return 0;
2905 }
2906 
2907 static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
2908                         struct file *file_out, loff_t pos_out, size_t len)
2909 {
2910         struct inode *src = file_inode(file_in);
2911         struct inode *dst = file_inode(file_out);
2912         struct f2fs_sb_info *sbi = F2FS_I_SB(src);
2913         size_t olen = len, dst_max_i_size = 0;
2914         size_t dst_osize;
2915         int ret;
2916 
2917         if (file_in->f_path.mnt != file_out->f_path.mnt ||
2918                                 src->i_sb != dst->i_sb)
2919                 return -EXDEV;
2920 
2921         if (unlikely(f2fs_readonly(src->i_sb)))
2922                 return -EROFS;
2923 
2924         if (!S_ISREG(src->i_mode) || !S_ISREG(dst->i_mode))
2925                 return -EINVAL;
2926 
2927         if (IS_ENCRYPTED(src) || IS_ENCRYPTED(dst))
2928                 return -EOPNOTSUPP;
2929 
2930         if (pos_out < 0 || pos_in < 0)
2931                 return -EINVAL;
2932 
2933         if (src == dst) {
2934                 if (pos_in == pos_out)
2935                         return 0;
2936                 if (pos_out > pos_in && pos_out < pos_in + len)
2937                         return -EINVAL;
2938         }
2939 
2940         inode_lock(src);
2941         if (src != dst) {
2942                 ret = -EBUSY;
2943                 if (!inode_trylock(dst))
2944                         goto out;
2945         }
2946 
2947         if (f2fs_compressed_file(src) || f2fs_compressed_file(dst) ||
2948                 f2fs_is_pinned_file(src) || f2fs_is_pinned_file(dst)) {
2949                 ret = -EOPNOTSUPP;
2950                 goto out_unlock;
2951         }
2952 
2953         if (f2fs_is_atomic_file(src) || f2fs_is_atomic_file(dst)) {
2954                 ret = -EINVAL;
2955                 goto out_unlock;
2956         }
2957 
2958         ret = -EINVAL;
2959         if (pos_in + len > src->i_size || pos_in + len < pos_in)
2960                 goto out_unlock;
2961         if (len == 0)
2962                 olen = len = src->i_size - pos_in;
2963         if (pos_in + len == src->i_size)
2964                 len = ALIGN(src->i_size, F2FS_BLKSIZE) - pos_in;
2965         if (len == 0) {
2966                 ret = 0;
2967                 goto out_unlock;
2968         }
2969 
2970         dst_osize = dst->i_size;
2971         if (pos_out + olen > dst->i_size)
2972                 dst_max_i_size = pos_out + olen;
2973 
2974         /* verify the end result is block aligned */
2975         if (!IS_ALIGNED(pos_in, F2FS_BLKSIZE) ||
2976                         !IS_ALIGNED(pos_in + len, F2FS_BLKSIZE) ||
2977                         !IS_ALIGNED(pos_out, F2FS_BLKSIZE))
2978                 goto out_unlock;
2979 
2980         ret = f2fs_convert_inline_inode(src);
2981         if (ret)
2982                 goto out_unlock;
2983 
2984         ret = f2fs_convert_inline_inode(dst);
2985         if (ret)
2986                 goto out_unlock;
2987 
2988         /* write out all dirty pages from offset */
2989         ret = filemap_write_and_wait_range(src->i_mapping,
2990                                         pos_in, pos_in + len);
2991         if (ret)
2992                 goto out_unlock;
2993 
2994         ret = filemap_write_and_wait_range(dst->i_mapping,
2995                                         pos_out, pos_out + len);
2996         if (ret)
2997                 goto out_unlock;
2998 
2999         f2fs_balance_fs(sbi, true);
3000 
3001         f2fs_down_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
3002         if (src != dst) {
3003                 ret = -EBUSY;
3004                 if (!f2fs_down_write_trylock(&F2FS_I(dst)->i_gc_rwsem[WRITE]))
3005                         goto out_src;
3006         }
3007 
3008         f2fs_lock_op(sbi);
3009         ret = __exchange_data_block(src, dst, pos_in >> F2FS_BLKSIZE_BITS,
3010                                 pos_out >> F2FS_BLKSIZE_BITS,
3011                                 len >> F2FS_BLKSIZE_BITS, false);
3012 
3013         if (!ret) {
3014                 if (dst_max_i_size)
3015                         f2fs_i_size_write(dst, dst_max_i_size);
3016                 else if (dst_osize != dst->i_size)
3017                         f2fs_i_size_write(dst, dst_osize);
3018         }
3019         f2fs_unlock_op(sbi);
3020 
3021         if (src != dst)
3022                 f2fs_up_write(&F2FS_I(dst)->i_gc_rwsem[WRITE]);
3023 out_src:
3024         f2fs_up_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
3025         if (ret)
3026                 goto out_unlock;
3027 
3028         inode_set_mtime_to_ts(src, inode_set_ctime_current(src));
3029         f2fs_mark_inode_dirty_sync(src, false);
3030         if (src != dst) {
3031                 inode_set_mtime_to_ts(dst, inode_set_ctime_current(dst));
3032                 f2fs_mark_inode_dirty_sync(dst, false);
3033         }
3034         f2fs_update_time(sbi, REQ_TIME);
3035 
3036 out_unlock:
3037         if (src != dst)
3038                 inode_unlock(dst);
3039 out:
3040         inode_unlock(src);
3041         return ret;
3042 }
3043 
3044 static int __f2fs_ioc_move_range(struct file *filp,
3045                                 struct f2fs_move_range *range)
3046 {
3047         struct fd dst;
3048         int err;
3049 
3050         if (!(filp->f_mode & FMODE_READ) ||
3051                         !(filp->f_mode & FMODE_WRITE))
3052                 return -EBADF;
3053 
3054         dst = fdget(range->dst_fd);
3055         if (!dst.file)
3056                 return -EBADF;
3057 
3058         if (!(dst.file->f_mode & FMODE_WRITE)) {
3059                 err = -EBADF;
3060                 goto err_out;
3061         }
3062 
3063         err = mnt_want_write_file(filp);
3064         if (err)
3065                 goto err_out;
3066 
3067         err = f2fs_move_file_range(filp, range->pos_in, dst.file,
3068                                         range->pos_out, range->len);
3069 
3070         mnt_drop_write_file(filp);
3071 err_out:
3072         fdput(dst);
3073         return err;
3074 }
3075 
3076 static int f2fs_ioc_move_range(struct file *filp, unsigned long arg)
3077 {
3078         struct f2fs_move_range range;
3079 
3080         if (copy_from_user(&range, (struct f2fs_move_range __user *)arg,
3081                                                         sizeof(range)))
3082                 return -EFAULT;
3083         return __f2fs_ioc_move_range(filp, &range);
3084 }
3085 
3086 static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg)
3087 {
3088         struct inode *inode = file_inode(filp);
3089         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3090         struct sit_info *sm = SIT_I(sbi);
3091         unsigned int start_segno = 0, end_segno = 0;
3092         unsigned int dev_start_segno = 0, dev_end_segno = 0;
3093         struct f2fs_flush_device range;
3094         struct f2fs_gc_control gc_control = {
3095                         .init_gc_type = FG_GC,
3096                         .should_migrate_blocks = true,
3097                         .err_gc_skipped = true,
3098                         .nr_free_secs = 0 };
3099         int ret;
3100 
3101         if (!capable(CAP_SYS_ADMIN))
3102                 return -EPERM;
3103 
3104         if (f2fs_readonly(sbi->sb))
3105                 return -EROFS;
3106 
3107         if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
3108                 return -EINVAL;
3109 
3110         if (copy_from_user(&range, (struct f2fs_flush_device __user *)arg,
3111                                                         sizeof(range)))
3112                 return -EFAULT;
3113 
3114         if (!f2fs_is_multi_device(sbi) || sbi->s_ndevs - 1 <= range.dev_num ||
3115                         __is_large_section(sbi)) {
3116                 f2fs_warn(sbi, "Can't flush %u in %d for SEGS_PER_SEC %u != 1",
3117                           range.dev_num, sbi->s_ndevs, SEGS_PER_SEC(sbi));
3118                 return -EINVAL;
3119         }
3120 
3121         ret = mnt_want_write_file(filp);
3122         if (ret)
3123                 return ret;
3124 
3125         if (range.dev_num != 0)
3126                 dev_start_segno = GET_SEGNO(sbi, FDEV(range.dev_num).start_blk);
3127         dev_end_segno = GET_SEGNO(sbi, FDEV(range.dev_num).end_blk);
3128 
3129         start_segno = sm->last_victim[FLUSH_DEVICE];
3130         if (start_segno < dev_start_segno || start_segno >= dev_end_segno)
3131                 start_segno = dev_start_segno;
3132         end_segno = min(start_segno + range.segments, dev_end_segno);
3133 
3134         while (start_segno < end_segno) {
3135                 if (!f2fs_down_write_trylock(&sbi->gc_lock)) {
3136                         ret = -EBUSY;
3137                         goto out;
3138                 }
3139                 sm->last_victim[GC_CB] = end_segno + 1;
3140                 sm->last_victim[GC_GREEDY] = end_segno + 1;
3141                 sm->last_victim[ALLOC_NEXT] = end_segno + 1;
3142 
3143                 gc_control.victim_segno = start_segno;
3144                 stat_inc_gc_call_count(sbi, FOREGROUND);
3145                 ret = f2fs_gc(sbi, &gc_control);
3146                 if (ret == -EAGAIN)
3147                         ret = 0;
3148                 else if (ret < 0)
3149                         break;
3150                 start_segno++;
3151         }
3152 out:
3153         mnt_drop_write_file(filp);
3154         return ret;
3155 }
3156 
3157 static int f2fs_ioc_get_features(struct file *filp, unsigned long arg)
3158 {
3159         struct inode *inode = file_inode(filp);
3160         u32 sb_feature = le32_to_cpu(F2FS_I_SB(inode)->raw_super->feature);
3161 
3162         /* Must validate to set it with SQLite behavior in Android. */
3163         sb_feature |= F2FS_FEATURE_ATOMIC_WRITE;
3164 
3165         return put_user(sb_feature, (u32 __user *)arg);
3166 }
3167 
3168 #ifdef CONFIG_QUOTA
3169 int f2fs_transfer_project_quota(struct inode *inode, kprojid_t kprojid)
3170 {
3171         struct dquot *transfer_to[MAXQUOTAS] = {};
3172         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3173         struct super_block *sb = sbi->sb;
3174         int err;
3175 
3176         transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid));
3177         if (IS_ERR(transfer_to[PRJQUOTA]))
3178                 return PTR_ERR(transfer_to[PRJQUOTA]);
3179 
3180         err = __dquot_transfer(inode, transfer_to);
3181         if (err)
3182                 set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
3183         dqput(transfer_to[PRJQUOTA]);
3184         return err;
3185 }
3186 
3187 static int f2fs_ioc_setproject(struct inode *inode, __u32 projid)
3188 {
3189         struct f2fs_inode_info *fi = F2FS_I(inode);
3190         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3191         struct f2fs_inode *ri = NULL;
3192         kprojid_t kprojid;
3193         int err;
3194 
3195         if (!f2fs_sb_has_project_quota(sbi)) {
3196                 if (projid != F2FS_DEF_PROJID)
3197                         return -EOPNOTSUPP;
3198                 else
3199                         return 0;
3200         }
3201 
3202         if (!f2fs_has_extra_attr(inode))
3203                 return -EOPNOTSUPP;
3204 
3205         kprojid = make_kprojid(&init_user_ns, (projid_t)projid);
3206 
3207         if (projid_eq(kprojid, fi->i_projid))
3208                 return 0;
3209 
3210         err = -EPERM;
3211         /* Is it quota file? Do not allow user to mess with it */
3212         if (IS_NOQUOTA(inode))
3213                 return err;
3214 
3215         if (!F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_projid))
3216                 return -EOVERFLOW;
3217 
3218         err = f2fs_dquot_initialize(inode);
3219         if (err)
3220                 return err;
3221 
3222         f2fs_lock_op(sbi);
3223         err = f2fs_transfer_project_quota(inode, kprojid);
3224         if (err)
3225                 goto out_unlock;
3226 
3227         fi->i_projid = kprojid;
3228         inode_set_ctime_current(inode);
3229         f2fs_mark_inode_dirty_sync(inode, true);
3230 out_unlock:
3231         f2fs_unlock_op(sbi);
3232         return err;
3233 }
3234 #else
3235 int f2fs_transfer_project_quota(struct inode *inode, kprojid_t kprojid)
3236 {
3237         return 0;
3238 }
3239 
3240 static int f2fs_ioc_setproject(struct inode *inode, __u32 projid)
3241 {
3242         if (projid != F2FS_DEF_PROJID)
3243                 return -EOPNOTSUPP;
3244         return 0;
3245 }
3246 #endif
3247 
3248 int f2fs_fileattr_get(struct dentry *dentry, struct fileattr *fa)
3249 {
3250         struct inode *inode = d_inode(dentry);
3251         struct f2fs_inode_info *fi = F2FS_I(inode);
3252         u32 fsflags = f2fs_iflags_to_fsflags(fi->i_flags);
3253 
3254         if (IS_ENCRYPTED(inode))
3255                 fsflags |= FS_ENCRYPT_FL;
3256         if (IS_VERITY(inode))
3257                 fsflags |= FS_VERITY_FL;
3258         if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode))
3259                 fsflags |= FS_INLINE_DATA_FL;
3260         if (is_inode_flag_set(inode, FI_PIN_FILE))
3261                 fsflags |= FS_NOCOW_FL;
3262 
3263         fileattr_fill_flags(fa, fsflags & F2FS_GETTABLE_FS_FL);
3264 
3265         if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)))
3266                 fa->fsx_projid = from_kprojid(&init_user_ns, fi->i_projid);
3267 
3268         return 0;
3269 }
3270 
3271 int f2fs_fileattr_set(struct mnt_idmap *idmap,
3272                       struct dentry *dentry, struct fileattr *fa)
3273 {
3274         struct inode *inode = d_inode(dentry);
3275         u32 fsflags = fa->flags, mask = F2FS_SETTABLE_FS_FL;
3276         u32 iflags;
3277         int err;
3278 
3279         if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
3280                 return -EIO;
3281         if (!f2fs_is_checkpoint_ready(F2FS_I_SB(inode)))
3282                 return -ENOSPC;
3283         if (fsflags & ~F2FS_GETTABLE_FS_FL)
3284                 return -EOPNOTSUPP;
3285         fsflags &= F2FS_SETTABLE_FS_FL;
3286         if (!fa->flags_valid)
3287                 mask &= FS_COMMON_FL;
3288 
3289         iflags = f2fs_fsflags_to_iflags(fsflags);
3290         if (f2fs_mask_flags(inode->i_mode, iflags) != iflags)
3291                 return -EOPNOTSUPP;
3292 
3293         err = f2fs_setflags_common(inode, iflags, f2fs_fsflags_to_iflags(mask));
3294         if (!err)
3295                 err = f2fs_ioc_setproject(inode, fa->fsx_projid);
3296 
3297         return err;
3298 }
3299 
3300 int f2fs_pin_file_control(struct inode *inode, bool inc)
3301 {
3302         struct f2fs_inode_info *fi = F2FS_I(inode);
3303         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3304 
3305         if (fi->i_gc_failures >= sbi->gc_pin_file_threshold) {
3306                 f2fs_warn(sbi, "%s: Enable GC = ino %lx after %x GC trials",
3307                           __func__, inode->i_ino, fi->i_gc_failures);
3308                 clear_inode_flag(inode, FI_PIN_FILE);
3309                 return -EAGAIN;
3310         }
3311 
3312         /* Use i_gc_failures for normal file as a risk signal. */
3313         if (inc)
3314                 f2fs_i_gc_failures_write(inode, fi->i_gc_failures + 1);
3315 
3316         return 0;
3317 }
3318 
3319 static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
3320 {
3321         struct inode *inode = file_inode(filp);
3322         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3323         __u32 pin;
3324         int ret = 0;
3325 
3326         if (get_user(pin, (__u32 __user *)arg))
3327                 return -EFAULT;
3328 
3329         if (!S_ISREG(inode->i_mode))
3330                 return -EINVAL;
3331 
3332         if (f2fs_readonly(sbi->sb))
3333                 return -EROFS;
3334 
3335         ret = mnt_want_write_file(filp);
3336         if (ret)
3337                 return ret;
3338 
3339         inode_lock(inode);
3340 
3341         if (f2fs_is_atomic_file(inode)) {
3342                 ret = -EINVAL;
3343                 goto out;
3344         }
3345 
3346         if (!pin) {
3347                 clear_inode_flag(inode, FI_PIN_FILE);
3348                 f2fs_i_gc_failures_write(inode, 0);
3349                 goto done;
3350         } else if (f2fs_is_pinned_file(inode)) {
3351                 goto done;
3352         }
3353 
3354         if (F2FS_HAS_BLOCKS(inode)) {
3355                 ret = -EFBIG;
3356                 goto out;
3357         }
3358 
3359         /* Let's allow file pinning on zoned device. */
3360         if (!f2fs_sb_has_blkzoned(sbi) &&
3361             f2fs_should_update_outplace(inode, NULL)) {
3362                 ret = -EINVAL;
3363                 goto out;
3364         }
3365 
3366         if (f2fs_pin_file_control(inode, false)) {
3367                 ret = -EAGAIN;
3368                 goto out;
3369         }
3370 
3371         ret = f2fs_convert_inline_inode(inode);
3372         if (ret)
3373                 goto out;
3374 
3375         if (!f2fs_disable_compressed_file(inode)) {
3376                 ret = -EOPNOTSUPP;
3377                 goto out;
3378         }
3379 
3380         set_inode_flag(inode, FI_PIN_FILE);
3381         ret = F2FS_I(inode)->i_gc_failures;
3382 done:
3383         f2fs_update_time(sbi, REQ_TIME);
3384 out:
3385         inode_unlock(inode);
3386         mnt_drop_write_file(filp);
3387         return ret;
3388 }
3389 
3390 static int f2fs_ioc_get_pin_file(struct file *filp, unsigned long arg)
3391 {
3392         struct inode *inode = file_inode(filp);
3393         __u32 pin = 0;
3394 
3395         if (is_inode_flag_set(inode, FI_PIN_FILE))
3396                 pin = F2FS_I(inode)->i_gc_failures;
3397         return put_user(pin, (u32 __user *)arg);
3398 }
3399 
3400 int f2fs_precache_extents(struct inode *inode)
3401 {
3402         struct f2fs_inode_info *fi = F2FS_I(inode);
3403         struct f2fs_map_blocks map;
3404         pgoff_t m_next_extent;
3405         loff_t end;
3406         int err;
3407 
3408         if (is_inode_flag_set(inode, FI_NO_EXTENT))
3409                 return -EOPNOTSUPP;
3410 
3411         map.m_lblk = 0;
3412         map.m_pblk = 0;
3413         map.m_next_pgofs = NULL;
3414         map.m_next_extent = &m_next_extent;
3415         map.m_seg_type = NO_CHECK_TYPE;
3416         map.m_may_create = false;
3417         end = F2FS_BLK_ALIGN(i_size_read(inode));
3418 
3419         while (map.m_lblk < end) {
3420                 map.m_len = end - map.m_lblk;
3421 
3422                 f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
3423                 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRECACHE);
3424                 f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
3425                 if (err || !map.m_len)
3426                         return err;
3427 
3428                 map.m_lblk = m_next_extent;
3429         }
3430 
3431         return 0;
3432 }
3433 
3434 static int f2fs_ioc_precache_extents(struct file *filp)
3435 {
3436         return f2fs_precache_extents(file_inode(filp));
3437 }
3438 
3439 static int f2fs_ioc_resize_fs(struct file *filp, unsigned long arg)
3440 {
3441         struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp));
3442         __u64 block_count;
3443 
3444         if (!capable(CAP_SYS_ADMIN))
3445                 return -EPERM;
3446 
3447         if (f2fs_readonly(sbi->sb))
3448                 return -EROFS;
3449 
3450         if (copy_from_user(&block_count, (void __user *)arg,
3451                            sizeof(block_count)))
3452                 return -EFAULT;
3453 
3454         return f2fs_resize_fs(filp, block_count);
3455 }
3456 
3457 static int f2fs_ioc_enable_verity(struct file *filp, unsigned long arg)
3458 {
3459         struct inode *inode = file_inode(filp);
3460 
3461         f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
3462 
3463         if (!f2fs_sb_has_verity(F2FS_I_SB(inode))) {
3464                 f2fs_warn(F2FS_I_SB(inode),
3465                           "Can't enable fs-verity on inode %lu: the verity feature is not enabled on this filesystem",
3466                           inode->i_ino);
3467                 return -EOPNOTSUPP;
3468         }
3469 
3470         return fsverity_ioctl_enable(filp, (const void __user *)arg);
3471 }
3472 
3473 static int f2fs_ioc_measure_verity(struct file *filp, unsigned long arg)
3474 {
3475         if (!f2fs_sb_has_verity(F2FS_I_SB(file_inode(filp))))
3476                 return -EOPNOTSUPP;
3477 
3478         return fsverity_ioctl_measure(filp, (void __user *)arg);
3479 }
3480 
3481 static int f2fs_ioc_read_verity_metadata(struct file *filp, unsigned long arg)
3482 {
3483         if (!f2fs_sb_has_verity(F2FS_I_SB(file_inode(filp))))
3484                 return -EOPNOTSUPP;
3485 
3486         return fsverity_ioctl_read_metadata(filp, (const void __user *)arg);
3487 }
3488 
3489 static int f2fs_ioc_getfslabel(struct file *filp, unsigned long arg)
3490 {
3491         struct inode *inode = file_inode(filp);
3492         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3493         char *vbuf;
3494         int count;
3495         int err = 0;
3496 
3497         vbuf = f2fs_kzalloc(sbi, MAX_VOLUME_NAME, GFP_KERNEL);
3498         if (!vbuf)
3499                 return -ENOMEM;
3500 
3501         f2fs_down_read(&sbi->sb_lock);
3502         count = utf16s_to_utf8s(sbi->raw_super->volume_name,
3503                         ARRAY_SIZE(sbi->raw_super->volume_name),
3504                         UTF16_LITTLE_ENDIAN, vbuf, MAX_VOLUME_NAME);
3505         f2fs_up_read(&sbi->sb_lock);
3506 
3507         if (copy_to_user((char __user *)arg, vbuf,
3508                                 min(FSLABEL_MAX, count)))
3509                 err = -EFAULT;
3510 
3511         kfree(vbuf);
3512         return err;
3513 }
3514 
3515 static int f2fs_ioc_setfslabel(struct file *filp, unsigned long arg)
3516 {
3517         struct inode *inode = file_inode(filp);
3518         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3519         char *vbuf;
3520         int err = 0;
3521 
3522         if (!capable(CAP_SYS_ADMIN))
3523                 return -EPERM;
3524 
3525         vbuf = strndup_user((const char __user *)arg, FSLABEL_MAX);
3526         if (IS_ERR(vbuf))
3527                 return PTR_ERR(vbuf);
3528 
3529         err = mnt_want_write_file(filp);
3530         if (err)
3531                 goto out;
3532 
3533         f2fs_down_write(&sbi->sb_lock);
3534 
3535         memset(sbi->raw_super->volume_name, 0,
3536                         sizeof(sbi->raw_super->volume_name));
3537         utf8s_to_utf16s(vbuf, strlen(vbuf), UTF16_LITTLE_ENDIAN,
3538                         sbi->raw_super->volume_name,
3539                         ARRAY_SIZE(sbi->raw_super->volume_name));
3540 
3541         err = f2fs_commit_super(sbi, false);
3542 
3543         f2fs_up_write(&sbi->sb_lock);
3544 
3545         mnt_drop_write_file(filp);
3546 out:
3547         kfree(vbuf);
3548         return err;
3549 }
3550 
3551 static int f2fs_get_compress_blocks(struct inode *inode, __u64 *blocks)
3552 {
3553         if (!f2fs_sb_has_compression(F2FS_I_SB(inode)))
3554                 return -EOPNOTSUPP;
3555 
3556         if (!f2fs_compressed_file(inode))
3557                 return -EINVAL;
3558 
3559         *blocks = atomic_read(&F2FS_I(inode)->i_compr_blocks);
3560 
3561         return 0;
3562 }
3563 
3564 static int f2fs_ioc_get_compress_blocks(struct file *filp, unsigned long arg)
3565 {
3566         struct inode *inode = file_inode(filp);
3567         __u64 blocks;
3568         int ret;
3569 
3570         ret = f2fs_get_compress_blocks(inode, &blocks);
3571         if (ret < 0)
3572                 return ret;
3573 
3574         return put_user(blocks, (u64 __user *)arg);
3575 }
3576 
3577 static int release_compress_blocks(struct dnode_of_data *dn, pgoff_t count)
3578 {
3579         struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
3580         unsigned int released_blocks = 0;
3581         int cluster_size = F2FS_I(dn->inode)->i_cluster_size;
3582         block_t blkaddr;
3583         int i;
3584 
3585         for (i = 0; i < count; i++) {
3586                 blkaddr = data_blkaddr(dn->inode, dn->node_page,
3587                                                 dn->ofs_in_node + i);
3588 
3589                 if (!__is_valid_data_blkaddr(blkaddr))
3590                         continue;
3591                 if (unlikely(!f2fs_is_valid_blkaddr(sbi, blkaddr,
3592                                         DATA_GENERIC_ENHANCE)))
3593                         return -EFSCORRUPTED;
3594         }
3595 
3596         while (count) {
3597                 int compr_blocks = 0;
3598 
3599                 for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) {
3600                         blkaddr = f2fs_data_blkaddr(dn);
3601 
3602                         if (i == 0) {
3603                                 if (blkaddr == COMPRESS_ADDR)
3604                                         continue;
3605                                 dn->ofs_in_node += cluster_size;
3606                                 goto next;
3607                         }
3608 
3609                         if (__is_valid_data_blkaddr(blkaddr))
3610                                 compr_blocks++;
3611 
3612                         if (blkaddr != NEW_ADDR)
3613                                 continue;
3614 
3615                         f2fs_set_data_blkaddr(dn, NULL_ADDR);
3616                 }
3617 
3618                 f2fs_i_compr_blocks_update(dn->inode, compr_blocks, false);
3619                 dec_valid_block_count(sbi, dn->inode,
3620                                         cluster_size - compr_blocks);
3621 
3622                 released_blocks += cluster_size - compr_blocks;
3623 next:
3624                 count -= cluster_size;
3625         }
3626 
3627         return released_blocks;
3628 }
3629 
3630 static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg)
3631 {
3632         struct inode *inode = file_inode(filp);
3633         struct f2fs_inode_info *fi = F2FS_I(inode);
3634         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3635         pgoff_t page_idx = 0, last_idx;
3636         unsigned int released_blocks = 0;
3637         int ret;
3638         int writecount;
3639 
3640         if (!f2fs_sb_has_compression(sbi))
3641                 return -EOPNOTSUPP;
3642 
3643         if (f2fs_readonly(sbi->sb))
3644                 return -EROFS;
3645 
3646         ret = mnt_want_write_file(filp);
3647         if (ret)
3648                 return ret;
3649 
3650         f2fs_balance_fs(sbi, true);
3651 
3652         inode_lock(inode);
3653 
3654         writecount = atomic_read(&inode->i_writecount);
3655         if ((filp->f_mode & FMODE_WRITE && writecount != 1) ||
3656                         (!(filp->f_mode & FMODE_WRITE) && writecount)) {
3657                 ret = -EBUSY;
3658                 goto out;
3659         }
3660 
3661         if (!f2fs_compressed_file(inode) ||
3662                 is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
3663                 ret = -EINVAL;
3664                 goto out;
3665         }
3666 
3667         ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
3668         if (ret)
3669                 goto out;
3670 
3671         if (!atomic_read(&fi->i_compr_blocks)) {
3672                 ret = -EPERM;
3673                 goto out;
3674         }
3675 
3676         set_inode_flag(inode, FI_COMPRESS_RELEASED);
3677         inode_set_ctime_current(inode);
3678         f2fs_mark_inode_dirty_sync(inode, true);
3679 
3680         f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
3681         filemap_invalidate_lock(inode->i_mapping);
3682 
3683         last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
3684 
3685         while (page_idx < last_idx) {
3686                 struct dnode_of_data dn;
3687                 pgoff_t end_offset, count;
3688 
3689                 f2fs_lock_op(sbi);
3690 
3691                 set_new_dnode(&dn, inode, NULL, NULL, 0);
3692                 ret = f2fs_get_dnode_of_data(&dn, page_idx, LOOKUP_NODE);
3693                 if (ret) {
3694                         f2fs_unlock_op(sbi);
3695                         if (ret == -ENOENT) {
3696                                 page_idx = f2fs_get_next_page_offset(&dn,
3697                                                                 page_idx);
3698                                 ret = 0;
3699                                 continue;
3700                         }
3701                         break;
3702                 }
3703 
3704                 end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
3705                 count = min(end_offset - dn.ofs_in_node, last_idx - page_idx);
3706                 count = round_up(count, fi->i_cluster_size);
3707 
3708                 ret = release_compress_blocks(&dn, count);
3709 
3710                 f2fs_put_dnode(&dn);
3711 
3712                 f2fs_unlock_op(sbi);
3713 
3714                 if (ret < 0)
3715                         break;
3716 
3717                 page_idx += count;
3718                 released_blocks += ret;
3719         }
3720 
3721         filemap_invalidate_unlock(inode->i_mapping);
3722         f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
3723 out:
3724         if (released_blocks)
3725                 f2fs_update_time(sbi, REQ_TIME);
3726         inode_unlock(inode);
3727 
3728         mnt_drop_write_file(filp);
3729 
3730         if (ret >= 0) {
3731                 ret = put_user(released_blocks, (u64 __user *)arg);
3732         } else if (released_blocks &&
3733                         atomic_read(&fi->i_compr_blocks)) {
3734                 set_sbi_flag(sbi, SBI_NEED_FSCK);
3735                 f2fs_warn(sbi, "%s: partial blocks were released i_ino=%lx "
3736                         "iblocks=%llu, released=%u, compr_blocks=%u, "
3737                         "run fsck to fix.",
3738                         __func__, inode->i_ino, inode->i_blocks,
3739                         released_blocks,
3740                         atomic_read(&fi->i_compr_blocks));
3741         }
3742 
3743         return ret;
3744 }
3745 
3746 static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count,
3747                 unsigned int *reserved_blocks)
3748 {
3749         struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
3750         int cluster_size = F2FS_I(dn->inode)->i_cluster_size;
3751         block_t blkaddr;
3752         int i;
3753 
3754         for (i = 0; i < count; i++) {
3755                 blkaddr = data_blkaddr(dn->inode, dn->node_page,
3756                                                 dn->ofs_in_node + i);
3757 
3758                 if (!__is_valid_data_blkaddr(blkaddr))
3759                         continue;
3760                 if (unlikely(!f2fs_is_valid_blkaddr(sbi, blkaddr,
3761                                         DATA_GENERIC_ENHANCE)))
3762                         return -EFSCORRUPTED;
3763         }
3764 
3765         while (count) {
3766                 int compr_blocks = 0;
3767                 blkcnt_t reserved = 0;
3768                 blkcnt_t to_reserved;
3769                 int ret;
3770 
3771                 for (i = 0; i < cluster_size; i++) {
3772                         blkaddr = data_blkaddr(dn->inode, dn->node_page,
3773                                                 dn->ofs_in_node + i);
3774 
3775                         if (i == 0) {
3776                                 if (blkaddr != COMPRESS_ADDR) {
3777                                         dn->ofs_in_node += cluster_size;
3778                                         goto next;
3779                                 }
3780                                 continue;
3781                         }
3782 
3783                         /*
3784                          * compressed cluster was not released due to it
3785                          * fails in release_compress_blocks(), so NEW_ADDR
3786                          * is a possible case.
3787                          */
3788                         if (blkaddr == NEW_ADDR) {
3789                                 reserved++;
3790                                 continue;
3791                         }
3792                         if (__is_valid_data_blkaddr(blkaddr)) {
3793                                 compr_blocks++;
3794                                 continue;
3795                         }
3796                 }
3797 
3798                 to_reserved = cluster_size - compr_blocks - reserved;
3799 
3800                 /* for the case all blocks in cluster were reserved */
3801                 if (to_reserved == 1) {
3802                         dn->ofs_in_node += cluster_size;
3803                         goto next;
3804                 }
3805 
3806                 ret = inc_valid_block_count(sbi, dn->inode,
3807                                                 &to_reserved, false);
3808                 if (unlikely(ret))
3809                         return ret;
3810 
3811                 for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) {
3812                         if (f2fs_data_blkaddr(dn) == NULL_ADDR)
3813                                 f2fs_set_data_blkaddr(dn, NEW_ADDR);
3814                 }
3815 
3816                 f2fs_i_compr_blocks_update(dn->inode, compr_blocks, true);
3817 
3818                 *reserved_blocks += to_reserved;
3819 next:
3820                 count -= cluster_size;
3821         }
3822 
3823         return 0;
3824 }
3825 
3826 static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
3827 {
3828         struct inode *inode = file_inode(filp);
3829         struct f2fs_inode_info *fi = F2FS_I(inode);
3830         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3831         pgoff_t page_idx = 0, last_idx;
3832         unsigned int reserved_blocks = 0;
3833         int ret;
3834 
3835         if (!f2fs_sb_has_compression(sbi))
3836                 return -EOPNOTSUPP;
3837 
3838         if (f2fs_readonly(sbi->sb))
3839                 return -EROFS;
3840 
3841         ret = mnt_want_write_file(filp);
3842         if (ret)
3843                 return ret;
3844 
3845         f2fs_balance_fs(sbi, true);
3846 
3847         inode_lock(inode);
3848 
3849         if (!f2fs_compressed_file(inode) ||
3850                 !is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
3851                 ret = -EINVAL;
3852                 goto unlock_inode;
3853         }
3854 
3855         if (atomic_read(&fi->i_compr_blocks))
3856                 goto unlock_inode;
3857 
3858         f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
3859         filemap_invalidate_lock(inode->i_mapping);
3860 
3861         last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
3862 
3863         while (page_idx < last_idx) {
3864                 struct dnode_of_data dn;
3865                 pgoff_t end_offset, count;
3866 
3867                 f2fs_lock_op(sbi);
3868 
3869                 set_new_dnode(&dn, inode, NULL, NULL, 0);
3870                 ret = f2fs_get_dnode_of_data(&dn, page_idx, LOOKUP_NODE);
3871                 if (ret) {
3872                         f2fs_unlock_op(sbi);
3873                         if (ret == -ENOENT) {
3874                                 page_idx = f2fs_get_next_page_offset(&dn,
3875                                                                 page_idx);
3876                                 ret = 0;
3877                                 continue;
3878                         }
3879                         break;
3880                 }
3881 
3882                 end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
3883                 count = min(end_offset - dn.ofs_in_node, last_idx - page_idx);
3884                 count = round_up(count, fi->i_cluster_size);
3885 
3886                 ret = reserve_compress_blocks(&dn, count, &reserved_blocks);
3887 
3888                 f2fs_put_dnode(&dn);
3889 
3890                 f2fs_unlock_op(sbi);
3891 
3892                 if (ret < 0)
3893                         break;
3894 
3895                 page_idx += count;
3896         }
3897 
3898         filemap_invalidate_unlock(inode->i_mapping);
3899         f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
3900 
3901         if (!ret) {
3902                 clear_inode_flag(inode, FI_COMPRESS_RELEASED);
3903                 inode_set_ctime_current(inode);
3904                 f2fs_mark_inode_dirty_sync(inode, true);
3905         }
3906 unlock_inode:
3907         if (reserved_blocks)
3908                 f2fs_update_time(sbi, REQ_TIME);
3909         inode_unlock(inode);
3910         mnt_drop_write_file(filp);
3911 
3912         if (!ret) {
3913                 ret = put_user(reserved_blocks, (u64 __user *)arg);
3914         } else if (reserved_blocks &&
3915                         atomic_read(&fi->i_compr_blocks)) {
3916                 set_sbi_flag(sbi, SBI_NEED_FSCK);
3917                 f2fs_warn(sbi, "%s: partial blocks were reserved i_ino=%lx "
3918                         "iblocks=%llu, reserved=%u, compr_blocks=%u, "
3919                         "run fsck to fix.",
3920                         __func__, inode->i_ino, inode->i_blocks,
3921                         reserved_blocks,
3922                         atomic_read(&fi->i_compr_blocks));
3923         }
3924 
3925         return ret;
3926 }
3927 
3928 static int f2fs_secure_erase(struct block_device *bdev, struct inode *inode,
3929                 pgoff_t off, block_t block, block_t len, u32 flags)
3930 {
3931         sector_t sector = SECTOR_FROM_BLOCK(block);
3932         sector_t nr_sects = SECTOR_FROM_BLOCK(len);
3933         int ret = 0;
3934 
3935         if (flags & F2FS_TRIM_FILE_DISCARD) {
3936                 if (bdev_max_secure_erase_sectors(bdev))
3937                         ret = blkdev_issue_secure_erase(bdev, sector, nr_sects,
3938                                         GFP_NOFS);
3939                 else
3940                         ret = blkdev_issue_discard(bdev, sector, nr_sects,
3941                                         GFP_NOFS);
3942         }
3943 
3944         if (!ret && (flags & F2FS_TRIM_FILE_ZEROOUT)) {
3945                 if (IS_ENCRYPTED(inode))
3946                         ret = fscrypt_zeroout_range(inode, off, block, len);
3947                 else
3948                         ret = blkdev_issue_zeroout(bdev, sector, nr_sects,
3949                                         GFP_NOFS, 0);
3950         }
3951 
3952         return ret;
3953 }
3954 
3955 static int f2fs_sec_trim_file(struct file *filp, unsigned long arg)
3956 {
3957         struct inode *inode = file_inode(filp);
3958         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3959         struct address_space *mapping = inode->i_mapping;
3960         struct block_device *prev_bdev = NULL;
3961         struct f2fs_sectrim_range range;
3962         pgoff_t index, pg_end, prev_index = 0;
3963         block_t prev_block = 0, len = 0;
3964         loff_t end_addr;
3965         bool to_end = false;
3966         int ret = 0;
3967 
3968         if (!(filp->f_mode & FMODE_WRITE))
3969                 return -EBADF;
3970 
3971         if (copy_from_user(&range, (struct f2fs_sectrim_range __user *)arg,
3972                                 sizeof(range)))
3973                 return -EFAULT;
3974 
3975         if (range.flags == 0 || (range.flags & ~F2FS_TRIM_FILE_MASK) ||
3976                         !S_ISREG(inode->i_mode))
3977                 return -EINVAL;
3978 
3979         if (((range.flags & F2FS_TRIM_FILE_DISCARD) &&
3980                         !f2fs_hw_support_discard(sbi)) ||
3981                         ((range.flags & F2FS_TRIM_FILE_ZEROOUT) &&
3982                          IS_ENCRYPTED(inode) && f2fs_is_multi_device(sbi)))
3983                 return -EOPNOTSUPP;
3984 
3985         ret = mnt_want_write_file(filp);
3986         if (ret)
3987                 return ret;
3988         inode_lock(inode);
3989 
3990         if (f2fs_is_atomic_file(inode) || f2fs_compressed_file(inode) ||
3991                         range.start >= inode->i_size) {
3992                 ret = -EINVAL;
3993                 goto err;
3994         }
3995 
3996         if (range.len == 0)
3997                 goto err;
3998 
3999         if (inode->i_size - range.start > range.len) {
4000                 end_addr = range.start + range.len;
4001         } else {
4002                 end_addr = range.len == (u64)-1 ?
4003                         sbi->sb->s_maxbytes : inode->i_size;
4004                 to_end = true;
4005         }
4006 
4007         if (!IS_ALIGNED(range.start, F2FS_BLKSIZE) ||
4008                         (!to_end && !IS_ALIGNED(end_addr, F2FS_BLKSIZE))) {
4009                 ret = -EINVAL;
4010                 goto err;
4011         }
4012 
4013         index = F2FS_BYTES_TO_BLK(range.start);
4014         pg_end = DIV_ROUND_UP(end_addr, F2FS_BLKSIZE);
4015 
4016         ret = f2fs_convert_inline_inode(inode);
4017         if (ret)
4018                 goto err;
4019 
4020         f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
4021         filemap_invalidate_lock(mapping);
4022 
4023         ret = filemap_write_and_wait_range(mapping, range.start,
4024                         to_end ? LLONG_MAX : end_addr - 1);
4025         if (ret)
4026                 goto out;
4027 
4028         truncate_inode_pages_range(mapping, range.start,
4029                         to_end ? -1 : end_addr - 1);
4030 
4031         while (index < pg_end) {
4032                 struct dnode_of_data dn;
4033                 pgoff_t end_offset, count;
4034                 int i;
4035 
4036                 set_new_dnode(&dn, inode, NULL, NULL, 0);
4037                 ret = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
4038                 if (ret) {
4039                         if (ret == -ENOENT) {
4040                                 index = f2fs_get_next_page_offset(&dn, index);
4041                                 continue;
4042                         }
4043                         goto out;
4044                 }
4045 
4046                 end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
4047                 count = min(end_offset - dn.ofs_in_node, pg_end - index);
4048                 for (i = 0; i < count; i++, index++, dn.ofs_in_node++) {
4049                         struct block_device *cur_bdev;
4050                         block_t blkaddr = f2fs_data_blkaddr(&dn);
4051 
4052                         if (!__is_valid_data_blkaddr(blkaddr))
4053                                 continue;
4054 
4055                         if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
4056                                                 DATA_GENERIC_ENHANCE)) {
4057                                 ret = -EFSCORRUPTED;
4058                                 f2fs_put_dnode(&dn);
4059                                 goto out;
4060                         }
4061 
4062                         cur_bdev = f2fs_target_device(sbi, blkaddr, NULL);
4063                         if (f2fs_is_multi_device(sbi)) {
4064                                 int di = f2fs_target_device_index(sbi, blkaddr);
4065 
4066                                 blkaddr -= FDEV(di).start_blk;
4067                         }
4068 
4069                         if (len) {
4070                                 if (prev_bdev == cur_bdev &&
4071                                                 index == prev_index + len &&
4072                                                 blkaddr == prev_block + len) {
4073                                         len++;
4074                                 } else {
4075                                         ret = f2fs_secure_erase(prev_bdev,
4076                                                 inode, prev_index, prev_block,
4077                                                 len, range.flags);
4078                                         if (ret) {
4079                                                 f2fs_put_dnode(&dn);
4080                                                 goto out;
4081                                         }
4082 
4083                                         len = 0;
4084                                 }
4085                         }
4086 
4087                         if (!len) {
4088                                 prev_bdev = cur_bdev;
4089                                 prev_index = index;
4090                                 prev_block = blkaddr;
4091                                 len = 1;
4092                         }
4093                 }
4094 
4095                 f2fs_put_dnode(&dn);
4096 
4097                 if (fatal_signal_pending(current)) {
4098                         ret = -EINTR;
4099                         goto out;
4100                 }
4101                 cond_resched();
4102         }
4103 
4104         if (len)
4105                 ret = f2fs_secure_erase(prev_bdev, inode, prev_index,
4106                                 prev_block, len, range.flags);
4107         f2fs_update_time(sbi, REQ_TIME);
4108 out:
4109         filemap_invalidate_unlock(mapping);
4110         f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
4111 err:
4112         inode_unlock(inode);
4113         mnt_drop_write_file(filp);
4114 
4115         return ret;
4116 }
4117 
4118 static int f2fs_ioc_get_compress_option(struct file *filp, unsigned long arg)
4119 {
4120         struct inode *inode = file_inode(filp);
4121         struct f2fs_comp_option option;
4122 
4123         if (!f2fs_sb_has_compression(F2FS_I_SB(inode)))
4124                 return -EOPNOTSUPP;
4125 
4126         inode_lock_shared(inode);
4127 
4128         if (!f2fs_compressed_file(inode)) {
4129                 inode_unlock_shared(inode);
4130                 return -ENODATA;
4131         }
4132 
4133         option.algorithm = F2FS_I(inode)->i_compress_algorithm;
4134         option.log_cluster_size = F2FS_I(inode)->i_log_cluster_size;
4135 
4136         inode_unlock_shared(inode);
4137 
4138         if (copy_to_user((struct f2fs_comp_option __user *)arg, &option,
4139                                 sizeof(option)))
4140                 return -EFAULT;
4141 
4142         return 0;
4143 }
4144 
4145 static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg)
4146 {
4147         struct inode *inode = file_inode(filp);
4148         struct f2fs_inode_info *fi = F2FS_I(inode);
4149         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
4150         struct f2fs_comp_option option;
4151         int ret = 0;
4152 
4153         if (!f2fs_sb_has_compression(sbi))
4154                 return -EOPNOTSUPP;
4155 
4156         if (!(filp->f_mode & FMODE_WRITE))
4157                 return -EBADF;
4158 
4159         if (copy_from_user(&option, (struct f2fs_comp_option __user *)arg,
4160                                 sizeof(option)))
4161                 return -EFAULT;
4162 
4163         if (option.log_cluster_size < MIN_COMPRESS_LOG_SIZE ||
4164                 option.log_cluster_size > MAX_COMPRESS_LOG_SIZE ||
4165                 option.algorithm >= COMPRESS_MAX)
4166                 return -EINVAL;
4167 
4168         ret = mnt_want_write_file(filp);
4169         if (ret)
4170                 return ret;
4171         inode_lock(inode);
4172 
4173         f2fs_down_write(&F2FS_I(inode)->i_sem);
4174         if (!f2fs_compressed_file(inode)) {
4175                 ret = -EINVAL;
4176                 goto out;
4177         }
4178 
4179         if (f2fs_is_mmap_file(inode) || get_dirty_pages(inode)) {
4180                 ret = -EBUSY;
4181                 goto out;
4182         }
4183 
4184         if (F2FS_HAS_BLOCKS(inode)) {
4185                 ret = -EFBIG;
4186                 goto out;
4187         }
4188 
4189         fi->i_compress_algorithm = option.algorithm;
4190         fi->i_log_cluster_size = option.log_cluster_size;
4191         fi->i_cluster_size = BIT(option.log_cluster_size);
4192         /* Set default level */
4193         if (fi->i_compress_algorithm == COMPRESS_ZSTD)
4194                 fi->i_compress_level = F2FS_ZSTD_DEFAULT_CLEVEL;
4195         else
4196                 fi->i_compress_level = 0;
4197         /* Adjust mount option level */
4198         if (option.algorithm == F2FS_OPTION(sbi).compress_algorithm &&
4199             F2FS_OPTION(sbi).compress_level)
4200                 fi->i_compress_level = F2FS_OPTION(sbi).compress_level;
4201         f2fs_mark_inode_dirty_sync(inode, true);
4202 
4203         if (!f2fs_is_compress_backend_ready(inode))
4204                 f2fs_warn(sbi, "compression algorithm is successfully set, "
4205                         "but current kernel doesn't support this algorithm.");
4206 out:
4207         f2fs_up_write(&fi->i_sem);
4208         inode_unlock(inode);
4209         mnt_drop_write_file(filp);
4210 
4211         return ret;
4212 }
4213 
4214 static int redirty_blocks(struct inode *inode, pgoff_t page_idx, int len)
4215 {
4216         DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, page_idx);
4217         struct address_space *mapping = inode->i_mapping;
4218         struct page *page;
4219         pgoff_t redirty_idx = page_idx;
4220         int i, page_len = 0, ret = 0;
4221 
4222         page_cache_ra_unbounded(&ractl, len, 0);
4223 
4224         for (i = 0; i < len; i++, page_idx++) {
4225                 page = read_cache_page(mapping, page_idx, NULL, NULL);
4226                 if (IS_ERR(page)) {
4227                         ret = PTR_ERR(page);
4228                         break;
4229                 }
4230                 page_len++;
4231         }
4232 
4233         for (i = 0; i < page_len; i++, redirty_idx++) {
4234                 page = find_lock_page(mapping, redirty_idx);
4235 
4236                 /* It will never fail, when page has pinned above */
4237                 f2fs_bug_on(F2FS_I_SB(inode), !page);
4238 
4239                 f2fs_wait_on_page_writeback(page, DATA, true, true);
4240 
4241                 set_page_dirty(page);
4242                 set_page_private_gcing(page);
4243                 f2fs_put_page(page, 1);
4244                 f2fs_put_page(page, 0);
4245         }
4246 
4247         return ret;
4248 }
4249 
4250 static int f2fs_ioc_decompress_file(struct file *filp)
4251 {
4252         struct inode *inode = file_inode(filp);
4253         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
4254         struct f2fs_inode_info *fi = F2FS_I(inode);
4255         pgoff_t page_idx = 0, last_idx, cluster_idx;
4256         int ret;
4257 
4258         if (!f2fs_sb_has_compression(sbi) ||
4259                         F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER)
4260                 return -EOPNOTSUPP;
4261 
4262         if (!(filp->f_mode & FMODE_WRITE))
4263                 return -EBADF;
4264 
4265         f2fs_balance_fs(sbi, true);
4266 
4267         ret = mnt_want_write_file(filp);
4268         if (ret)
4269                 return ret;
4270         inode_lock(inode);
4271 
4272         if (!f2fs_is_compress_backend_ready(inode)) {
4273                 ret = -EOPNOTSUPP;
4274                 goto out;
4275         }
4276 
4277         if (!f2fs_compressed_file(inode) ||
4278                 is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
4279                 ret = -EINVAL;
4280                 goto out;
4281         }
4282 
4283         ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
4284         if (ret)
4285                 goto out;
4286 
4287         if (!atomic_read(&fi->i_compr_blocks))
4288                 goto out;
4289 
4290         last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
4291         last_idx >>= fi->i_log_cluster_size;
4292 
4293         for (cluster_idx = 0; cluster_idx < last_idx; cluster_idx++) {
4294                 page_idx = cluster_idx << fi->i_log_cluster_size;
4295 
4296                 if (!f2fs_is_compressed_cluster(inode, page_idx))
4297                         continue;
4298 
4299                 ret = redirty_blocks(inode, page_idx, fi->i_cluster_size);
4300                 if (ret < 0)
4301                         break;
4302 
4303                 if (get_dirty_pages(inode) >= BLKS_PER_SEG(sbi)) {
4304                         ret = filemap_fdatawrite(inode->i_mapping);
4305                         if (ret < 0)
4306                                 break;
4307                 }
4308 
4309                 cond_resched();
4310                 if (fatal_signal_pending(current)) {
4311                         ret = -EINTR;
4312                         break;
4313                 }
4314         }
4315 
4316         if (!ret)
4317                 ret = filemap_write_and_wait_range(inode->i_mapping, 0,
4318                                                         LLONG_MAX);
4319 
4320         if (ret)
4321                 f2fs_warn(sbi, "%s: The file might be partially decompressed (errno=%d). Please delete the file.",
4322                           __func__, ret);
4323         f2fs_update_time(sbi, REQ_TIME);
4324 out:
4325         inode_unlock(inode);
4326         mnt_drop_write_file(filp);
4327 
4328         return ret;
4329 }
4330 
4331 static int f2fs_ioc_compress_file(struct file *filp)
4332 {
4333         struct inode *inode = file_inode(filp);
4334         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
4335         struct f2fs_inode_info *fi = F2FS_I(inode);
4336         pgoff_t page_idx = 0, last_idx, cluster_idx;
4337         int ret;
4338 
4339         if (!f2fs_sb_has_compression(sbi) ||
4340                         F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER)
4341                 return -EOPNOTSUPP;
4342 
4343         if (!(filp->f_mode & FMODE_WRITE))
4344                 return -EBADF;
4345 
4346         f2fs_balance_fs(sbi, true);
4347 
4348         ret = mnt_want_write_file(filp);
4349         if (ret)
4350                 return ret;
4351         inode_lock(inode);
4352 
4353         if (!f2fs_is_compress_backend_ready(inode)) {
4354                 ret = -EOPNOTSUPP;
4355                 goto out;
4356         }
4357 
4358         if (!f2fs_compressed_file(inode) ||
4359                 is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
4360                 ret = -EINVAL;
4361                 goto out;
4362         }
4363 
4364         ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
4365         if (ret)
4366                 goto out;
4367 
4368         set_inode_flag(inode, FI_ENABLE_COMPRESS);
4369 
4370         last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
4371         last_idx >>= fi->i_log_cluster_size;
4372 
4373         for (cluster_idx = 0; cluster_idx < last_idx; cluster_idx++) {
4374                 page_idx = cluster_idx << fi->i_log_cluster_size;
4375 
4376                 if (f2fs_is_sparse_cluster(inode, page_idx))
4377                         continue;
4378 
4379                 ret = redirty_blocks(inode, page_idx, fi->i_cluster_size);
4380                 if (ret < 0)
4381                         break;
4382 
4383                 if (get_dirty_pages(inode) >= BLKS_PER_SEG(sbi)) {
4384                         ret = filemap_fdatawrite(inode->i_mapping);
4385                         if (ret < 0)
4386                                 break;
4387                 }
4388 
4389                 cond_resched();
4390                 if (fatal_signal_pending(current)) {
4391                         ret = -EINTR;
4392                         break;
4393                 }
4394         }
4395 
4396         if (!ret)
4397                 ret = filemap_write_and_wait_range(inode->i_mapping, 0,
4398                                                         LLONG_MAX);
4399 
4400         clear_inode_flag(inode, FI_ENABLE_COMPRESS);
4401 
4402         if (ret)
4403                 f2fs_warn(sbi, "%s: The file might be partially compressed (errno=%d). Please delete the file.",
4404                           __func__, ret);
4405         f2fs_update_time(sbi, REQ_TIME);
4406 out:
4407         inode_unlock(inode);
4408         mnt_drop_write_file(filp);
4409 
4410         return ret;
4411 }
4412 
4413 static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
4414 {
4415         switch (cmd) {
4416         case FS_IOC_GETVERSION:
4417                 return f2fs_ioc_getversion(filp, arg);
4418         case F2FS_IOC_START_ATOMIC_WRITE:
4419                 return f2fs_ioc_start_atomic_write(filp, false);
4420         case F2FS_IOC_START_ATOMIC_REPLACE:
4421                 return f2fs_ioc_start_atomic_write(filp, true);
4422         case F2FS_IOC_COMMIT_ATOMIC_WRITE:
4423                 return f2fs_ioc_commit_atomic_write(filp);
4424         case F2FS_IOC_ABORT_ATOMIC_WRITE:
4425                 return f2fs_ioc_abort_atomic_write(filp);
4426         case F2FS_IOC_START_VOLATILE_WRITE:
4427         case F2FS_IOC_RELEASE_VOLATILE_WRITE:
4428                 return -EOPNOTSUPP;
4429         case F2FS_IOC_SHUTDOWN:
4430                 return f2fs_ioc_shutdown(filp, arg);
4431         case FITRIM:
4432                 return f2fs_ioc_fitrim(filp, arg);
4433         case FS_IOC_SET_ENCRYPTION_POLICY:
4434                 return f2fs_ioc_set_encryption_policy(filp, arg);
4435         case FS_IOC_GET_ENCRYPTION_POLICY:
4436                 return f2fs_ioc_get_encryption_policy(filp, arg);
4437         case FS_IOC_GET_ENCRYPTION_PWSALT:
4438                 return f2fs_ioc_get_encryption_pwsalt(filp, arg);
4439         case FS_IOC_GET_ENCRYPTION_POLICY_EX:
4440                 return f2fs_ioc_get_encryption_policy_ex(filp, arg);
4441         case FS_IOC_ADD_ENCRYPTION_KEY:
4442                 return f2fs_ioc_add_encryption_key(filp, arg);
4443         case FS_IOC_REMOVE_ENCRYPTION_KEY:
4444                 return f2fs_ioc_remove_encryption_key(filp, arg);
4445         case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS:
4446                 return f2fs_ioc_remove_encryption_key_all_users(filp, arg);
4447         case FS_IOC_GET_ENCRYPTION_KEY_STATUS:
4448                 return f2fs_ioc_get_encryption_key_status(filp, arg);
4449         case FS_IOC_GET_ENCRYPTION_NONCE:
4450                 return f2fs_ioc_get_encryption_nonce(filp, arg);
4451         case F2FS_IOC_GARBAGE_COLLECT:
4452                 return f2fs_ioc_gc(filp, arg);
4453         case F2FS_IOC_GARBAGE_COLLECT_RANGE:
4454                 return f2fs_ioc_gc_range(filp, arg);
4455         case F2FS_IOC_WRITE_CHECKPOINT:
4456                 return f2fs_ioc_write_checkpoint(filp);
4457         case F2FS_IOC_DEFRAGMENT:
4458                 return f2fs_ioc_defragment(filp, arg);
4459         case F2FS_IOC_MOVE_RANGE:
4460                 return f2fs_ioc_move_range(filp, arg);
4461         case F2FS_IOC_FLUSH_DEVICE:
4462                 return f2fs_ioc_flush_device(filp, arg);
4463         case F2FS_IOC_GET_FEATURES:
4464                 return f2fs_ioc_get_features(filp, arg);
4465         case F2FS_IOC_GET_PIN_FILE:
4466                 return f2fs_ioc_get_pin_file(filp, arg);
4467         case F2FS_IOC_SET_PIN_FILE:
4468                 return f2fs_ioc_set_pin_file(filp, arg);
4469         case F2FS_IOC_PRECACHE_EXTENTS:
4470                 return f2fs_ioc_precache_extents(filp);
4471         case F2FS_IOC_RESIZE_FS:
4472                 return f2fs_ioc_resize_fs(filp, arg);
4473         case FS_IOC_ENABLE_VERITY:
4474                 return f2fs_ioc_enable_verity(filp, arg);
4475         case FS_IOC_MEASURE_VERITY:
4476                 return f2fs_ioc_measure_verity(filp, arg);
4477         case FS_IOC_READ_VERITY_METADATA:
4478                 return f2fs_ioc_read_verity_metadata(filp, arg);
4479         case FS_IOC_GETFSLABEL:
4480                 return f2fs_ioc_getfslabel(filp, arg);
4481         case FS_IOC_SETFSLABEL:
4482                 return f2fs_ioc_setfslabel(filp, arg);
4483         case F2FS_IOC_GET_COMPRESS_BLOCKS:
4484                 return f2fs_ioc_get_compress_blocks(filp, arg);
4485         case F2FS_IOC_RELEASE_COMPRESS_BLOCKS:
4486                 return f2fs_release_compress_blocks(filp, arg);
4487         case F2FS_IOC_RESERVE_COMPRESS_BLOCKS:
4488                 return f2fs_reserve_compress_blocks(filp, arg);
4489         case F2FS_IOC_SEC_TRIM_FILE:
4490                 return f2fs_sec_trim_file(filp, arg);
4491         case F2FS_IOC_GET_COMPRESS_OPTION:
4492                 return f2fs_ioc_get_compress_option(filp, arg);
4493         case F2FS_IOC_SET_COMPRESS_OPTION:
4494                 return f2fs_ioc_set_compress_option(filp, arg);
4495         case F2FS_IOC_DECOMPRESS_FILE:
4496                 return f2fs_ioc_decompress_file(filp);
4497         case F2FS_IOC_COMPRESS_FILE:
4498                 return f2fs_ioc_compress_file(filp);
4499         default:
4500                 return -ENOTTY;
4501         }
4502 }
4503 
4504 long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
4505 {
4506         if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp)))))
4507                 return -EIO;
4508         if (!f2fs_is_checkpoint_ready(F2FS_I_SB(file_inode(filp))))
4509                 return -ENOSPC;
4510 
4511         return __f2fs_ioctl(filp, cmd, arg);
4512 }
4513 
4514 /*
4515  * Return %true if the given read or write request should use direct I/O, or
4516  * %false if it should use buffered I/O.
4517  */
4518 static bool f2fs_should_use_dio(struct inode *inode, struct kiocb *iocb,
4519                                 struct iov_iter *iter)
4520 {
4521         unsigned int align;
4522 
4523         if (!(iocb->ki_flags & IOCB_DIRECT))
4524                 return false;
4525 
4526         if (f2fs_force_buffered_io(inode, iov_iter_rw(iter)))
4527                 return false;
4528 
4529         /*
4530          * Direct I/O not aligned to the disk's logical_block_size will be
4531          * attempted, but will fail with -EINVAL.
4532          *
4533          * f2fs additionally requires that direct I/O be aligned to the
4534          * filesystem block size, which is often a stricter requirement.
4535          * However, f2fs traditionally falls back to buffered I/O on requests
4536          * that are logical_block_size-aligned but not fs-block aligned.
4537          *
4538          * The below logic implements this behavior.
4539          */
4540         align = iocb->ki_pos | iov_iter_alignment(iter);
4541         if (!IS_ALIGNED(align, i_blocksize(inode)) &&
4542             IS_ALIGNED(align, bdev_logical_block_size(inode->i_sb->s_bdev)))
4543                 return false;
4544 
4545         return true;
4546 }
4547 
4548 static int f2fs_dio_read_end_io(struct kiocb *iocb, ssize_t size, int error,
4549                                 unsigned int flags)
4550 {
4551         struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(iocb->ki_filp));
4552 
4553         dec_page_count(sbi, F2FS_DIO_READ);
4554         if (error)
4555                 return error;
4556         f2fs_update_iostat(sbi, NULL, APP_DIRECT_READ_IO, size);
4557         return 0;
4558 }
4559 
4560 static const struct iomap_dio_ops f2fs_iomap_dio_read_ops = {
4561         .end_io = f2fs_dio_read_end_io,
4562 };
4563 
4564 static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
4565 {
4566         struct file *file = iocb->ki_filp;
4567         struct inode *inode = file_inode(file);
4568         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
4569         struct f2fs_inode_info *fi = F2FS_I(inode);
4570         const loff_t pos = iocb->ki_pos;
4571         const size_t count = iov_iter_count(to);
4572         struct iomap_dio *dio;
4573         ssize_t ret;
4574 
4575         if (count == 0)
4576                 return 0; /* skip atime update */
4577 
4578         trace_f2fs_direct_IO_enter(inode, iocb, count, READ);
4579 
4580         if (iocb->ki_flags & IOCB_NOWAIT) {
4581                 if (!f2fs_down_read_trylock(&fi->i_gc_rwsem[READ])) {
4582                         ret = -EAGAIN;
4583                         goto out;
4584                 }
4585         } else {
4586                 f2fs_down_read(&fi->i_gc_rwsem[READ]);
4587         }
4588 
4589         /*
4590          * We have to use __iomap_dio_rw() and iomap_dio_complete() instead of
4591          * the higher-level function iomap_dio_rw() in order to ensure that the
4592          * F2FS_DIO_READ counter will be decremented correctly in all cases.
4593          */
4594         inc_page_count(sbi, F2FS_DIO_READ);
4595         dio = __iomap_dio_rw(iocb, to, &f2fs_iomap_ops,
4596                              &f2fs_iomap_dio_read_ops, 0, NULL, 0);
4597         if (IS_ERR_OR_NULL(dio)) {
4598                 ret = PTR_ERR_OR_ZERO(dio);
4599                 if (ret != -EIOCBQUEUED)
4600                         dec_page_count(sbi, F2FS_DIO_READ);
4601         } else {
4602                 ret = iomap_dio_complete(dio);
4603         }
4604 
4605         f2fs_up_read(&fi->i_gc_rwsem[READ]);
4606 
4607         file_accessed(file);
4608 out:
4609         trace_f2fs_direct_IO_exit(inode, pos, count, READ, ret);
4610         return ret;
4611 }
4612 
4613 static void f2fs_trace_rw_file_path(struct file *file, loff_t pos, size_t count,
4614                                     int rw)
4615 {
4616         struct inode *inode = file_inode(file);
4617         char *buf, *path;
4618 
4619         buf = f2fs_getname(F2FS_I_SB(inode));
4620         if (!buf)
4621                 return;
4622         path = dentry_path_raw(file_dentry(file), buf, PATH_MAX);
4623         if (IS_ERR(path))
4624                 goto free_buf;
4625         if (rw == WRITE)
4626                 trace_f2fs_datawrite_start(inode, pos, count,
4627                                 current->pid, path, current->comm);
4628         else
4629                 trace_f2fs_dataread_start(inode, pos, count,
4630                                 current->pid, path, current->comm);
4631 free_buf:
4632         f2fs_putname(buf);
4633 }
4634 
4635 static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
4636 {
4637         struct inode *inode = file_inode(iocb->ki_filp);
4638         const loff_t pos = iocb->ki_pos;
4639         ssize_t ret;
4640 
4641         if (!f2fs_is_compress_backend_ready(inode))
4642                 return -EOPNOTSUPP;
4643 
4644         if (trace_f2fs_dataread_start_enabled())
4645                 f2fs_trace_rw_file_path(iocb->ki_filp, iocb->ki_pos,
4646                                         iov_iter_count(to), READ);
4647 
4648         /* In LFS mode, if there is inflight dio, wait for its completion */
4649         if (f2fs_lfs_mode(F2FS_I_SB(inode)))
4650                 inode_dio_wait(inode);
4651 
4652         if (f2fs_should_use_dio(inode, iocb, to)) {
4653                 ret = f2fs_dio_read_iter(iocb, to);
4654         } else {
4655                 ret = filemap_read(iocb, to, 0);
4656                 if (ret > 0)
4657                         f2fs_update_iostat(F2FS_I_SB(inode), inode,
4658                                                 APP_BUFFERED_READ_IO, ret);
4659         }
4660         if (trace_f2fs_dataread_end_enabled())
4661                 trace_f2fs_dataread_end(inode, pos, ret);
4662         return ret;
4663 }
4664 
4665 static ssize_t f2fs_file_splice_read(struct file *in, loff_t *ppos,
4666                                      struct pipe_inode_info *pipe,
4667                                      size_t len, unsigned int flags)
4668 {
4669         struct inode *inode = file_inode(in);
4670         const loff_t pos = *ppos;
4671         ssize_t ret;
4672 
4673         if (!f2fs_is_compress_backend_ready(inode))
4674                 return -EOPNOTSUPP;
4675 
4676         if (trace_f2fs_dataread_start_enabled())
4677                 f2fs_trace_rw_file_path(in, pos, len, READ);
4678 
4679         ret = filemap_splice_read(in, ppos, pipe, len, flags);
4680         if (ret > 0)
4681                 f2fs_update_iostat(F2FS_I_SB(inode), inode,
4682                                    APP_BUFFERED_READ_IO, ret);
4683 
4684         if (trace_f2fs_dataread_end_enabled())
4685                 trace_f2fs_dataread_end(inode, pos, ret);
4686         return ret;
4687 }
4688 
4689 static ssize_t f2fs_write_checks(struct kiocb *iocb, struct iov_iter *from)
4690 {
4691         struct file *file = iocb->ki_filp;
4692         struct inode *inode = file_inode(file);
4693         ssize_t count;
4694         int err;
4695 
4696         if (IS_IMMUTABLE(inode))
4697                 return -EPERM;
4698 
4699         if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED))
4700                 return -EPERM;
4701 
4702         count = generic_write_checks(iocb, from);
4703         if (count <= 0)
4704                 return count;
4705 
4706         err = file_modified(file);
4707         if (err)
4708                 return err;
4709         return count;
4710 }
4711 
4712 /*
4713  * Preallocate blocks for a write request, if it is possible and helpful to do
4714  * so.  Returns a positive number if blocks may have been preallocated, 0 if no
4715  * blocks were preallocated, or a negative errno value if something went
4716  * seriously wrong.  Also sets FI_PREALLOCATED_ALL on the inode if *all* the
4717  * requested blocks (not just some of them) have been allocated.
4718  */
4719 static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter,
4720                                    bool dio)
4721 {
4722         struct inode *inode = file_inode(iocb->ki_filp);
4723         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
4724         const loff_t pos = iocb->ki_pos;
4725         const size_t count = iov_iter_count(iter);
4726         struct f2fs_map_blocks map = {};
4727         int flag;
4728         int ret;
4729 
4730         /* If it will be an out-of-place direct write, don't bother. */
4731         if (dio && f2fs_lfs_mode(sbi))
4732                 return 0;
4733         /*
4734          * Don't preallocate holes aligned to DIO_SKIP_HOLES which turns into
4735          * buffered IO, if DIO meets any holes.
4736          */
4737         if (dio && i_size_read(inode) &&
4738                 (F2FS_BYTES_TO_BLK(pos) < F2FS_BLK_ALIGN(i_size_read(inode))))
4739                 return 0;
4740 
4741         /* No-wait I/O can't allocate blocks. */
4742         if (iocb->ki_flags & IOCB_NOWAIT)
4743                 return 0;
4744 
4745         /* If it will be a short write, don't bother. */
4746         if (fault_in_iov_iter_readable(iter, count))
4747                 return 0;
4748 
4749         if (f2fs_has_inline_data(inode)) {
4750                 /* If the data will fit inline, don't bother. */
4751                 if (pos + count <= MAX_INLINE_DATA(inode))
4752                         return 0;
4753                 ret = f2fs_convert_inline_inode(inode);
4754                 if (ret)
4755                         return ret;
4756         }
4757 
4758         /* Do not preallocate blocks that will be written partially in 4KB. */
4759         map.m_lblk = F2FS_BLK_ALIGN(pos);
4760         map.m_len = F2FS_BYTES_TO_BLK(pos + count);
4761         if (map.m_len > map.m_lblk)
4762                 map.m_len -= map.m_lblk;
4763         else
4764                 return 0;
4765 
4766         map.m_may_create = true;
4767         if (dio) {
4768                 map.m_seg_type = f2fs_rw_hint_to_seg_type(sbi,
4769                                                 inode->i_write_hint);
4770                 flag = F2FS_GET_BLOCK_PRE_DIO;
4771         } else {
4772                 map.m_seg_type = NO_CHECK_TYPE;
4773                 flag = F2FS_GET_BLOCK_PRE_AIO;
4774         }
4775 
4776         ret = f2fs_map_blocks(inode, &map, flag);
4777         /* -ENOSPC|-EDQUOT are fine to report the number of allocated blocks. */
4778         if (ret < 0 && !((ret == -ENOSPC || ret == -EDQUOT) && map.m_len > 0))
4779                 return ret;
4780         if (ret == 0)
4781                 set_inode_flag(inode, FI_PREALLOCATED_ALL);
4782         return map.m_len;
4783 }
4784 
4785 static ssize_t f2fs_buffered_write_iter(struct kiocb *iocb,
4786                                         struct iov_iter *from)
4787 {
4788         struct file *file = iocb->ki_filp;
4789         struct inode *inode = file_inode(file);
4790         ssize_t ret;
4791 
4792         if (iocb->ki_flags & IOCB_NOWAIT)
4793                 return -EOPNOTSUPP;
4794 
4795         ret = generic_perform_write(iocb, from);
4796 
4797         if (ret > 0) {
4798                 f2fs_update_iostat(F2FS_I_SB(inode), inode,
4799                                                 APP_BUFFERED_IO, ret);
4800         }
4801         return ret;
4802 }
4803 
4804 static int f2fs_dio_write_end_io(struct kiocb *iocb, ssize_t size, int error,
4805                                  unsigned int flags)
4806 {
4807         struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(iocb->ki_filp));
4808 
4809         dec_page_count(sbi, F2FS_DIO_WRITE);
4810         if (error)
4811                 return error;
4812         f2fs_update_time(sbi, REQ_TIME);
4813         f2fs_update_iostat(sbi, NULL, APP_DIRECT_IO, size);
4814         return 0;
4815 }
4816 
4817 static void f2fs_dio_write_submit_io(const struct iomap_iter *iter,
4818                                         struct bio *bio, loff_t file_offset)
4819 {
4820         struct inode *inode = iter->inode;
4821         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
4822         int seg_type = f2fs_rw_hint_to_seg_type(sbi, inode->i_write_hint);
4823         enum temp_type temp = f2fs_get_segment_temp(seg_type);
4824 
4825         bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi, DATA, temp);
4826         submit_bio(bio);
4827 }
4828 
4829 static const struct iomap_dio_ops f2fs_iomap_dio_write_ops = {
4830         .end_io         = f2fs_dio_write_end_io,
4831         .submit_io      = f2fs_dio_write_submit_io,
4832 };
4833 
4834 static void f2fs_flush_buffered_write(struct address_space *mapping,
4835                                       loff_t start_pos, loff_t end_pos)
4836 {
4837         int ret;
4838 
4839         ret = filemap_write_and_wait_range(mapping, start_pos, end_pos);
4840         if (ret < 0)
4841                 return;
4842         invalidate_mapping_pages(mapping,
4843                                  start_pos >> PAGE_SHIFT,
4844                                  end_pos >> PAGE_SHIFT);
4845 }
4846 
4847 static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from,
4848                                    bool *may_need_sync)
4849 {
4850         struct file *file = iocb->ki_filp;
4851         struct inode *inode = file_inode(file);
4852         struct f2fs_inode_info *fi = F2FS_I(inode);
4853         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
4854         const bool do_opu = f2fs_lfs_mode(sbi);
4855         const loff_t pos = iocb->ki_pos;
4856         const ssize_t count = iov_iter_count(from);
4857         unsigned int dio_flags;
4858         struct iomap_dio *dio;
4859         ssize_t ret;
4860 
4861         trace_f2fs_direct_IO_enter(inode, iocb, count, WRITE);
4862 
4863         if (iocb->ki_flags & IOCB_NOWAIT) {
4864                 /* f2fs_convert_inline_inode() and block allocation can block */
4865                 if (f2fs_has_inline_data(inode) ||
4866                     !f2fs_overwrite_io(inode, pos, count)) {
4867                         ret = -EAGAIN;
4868                         goto out;
4869                 }
4870 
4871                 if (!f2fs_down_read_trylock(&fi->i_gc_rwsem[WRITE])) {
4872                         ret = -EAGAIN;
4873                         goto out;
4874                 }
4875                 if (do_opu && !f2fs_down_read_trylock(&fi->i_gc_rwsem[READ])) {
4876                         f2fs_up_read(&fi->i_gc_rwsem[WRITE]);
4877                         ret = -EAGAIN;
4878                         goto out;
4879                 }
4880         } else {
4881                 ret = f2fs_convert_inline_inode(inode);
4882                 if (ret)
4883                         goto out;
4884 
4885                 f2fs_down_read(&fi->i_gc_rwsem[WRITE]);
4886                 if (do_opu)
4887                         f2fs_down_read(&fi->i_gc_rwsem[READ]);
4888         }
4889 
4890         /*
4891          * We have to use __iomap_dio_rw() and iomap_dio_complete() instead of
4892          * the higher-level function iomap_dio_rw() in order to ensure that the
4893          * F2FS_DIO_WRITE counter will be decremented correctly in all cases.
4894          */
4895         inc_page_count(sbi, F2FS_DIO_WRITE);
4896         dio_flags = 0;
4897         if (pos + count > inode->i_size)
4898                 dio_flags |= IOMAP_DIO_FORCE_WAIT;
4899         dio = __iomap_dio_rw(iocb, from, &f2fs_iomap_ops,
4900                              &f2fs_iomap_dio_write_ops, dio_flags, NULL, 0);
4901         if (IS_ERR_OR_NULL(dio)) {
4902                 ret = PTR_ERR_OR_ZERO(dio);
4903                 if (ret == -ENOTBLK)
4904                         ret = 0;
4905                 if (ret != -EIOCBQUEUED)
4906                         dec_page_count(sbi, F2FS_DIO_WRITE);
4907         } else {
4908                 ret = iomap_dio_complete(dio);
4909         }
4910 
4911         if (do_opu)
4912                 f2fs_up_read(&fi->i_gc_rwsem[READ]);
4913         f2fs_up_read(&fi->i_gc_rwsem[WRITE]);
4914 
4915         if (ret < 0)
4916                 goto out;
4917         if (pos + ret > inode->i_size)
4918                 f2fs_i_size_write(inode, pos + ret);
4919         if (!do_opu)
4920                 set_inode_flag(inode, FI_UPDATE_WRITE);
4921 
4922         if (iov_iter_count(from)) {
4923                 ssize_t ret2;
4924                 loff_t bufio_start_pos = iocb->ki_pos;
4925 
4926                 /*
4927                  * The direct write was partial, so we need to fall back to a
4928                  * buffered write for the remainder.
4929                  */
4930 
4931                 ret2 = f2fs_buffered_write_iter(iocb, from);
4932                 if (iov_iter_count(from))
4933                         f2fs_write_failed(inode, iocb->ki_pos);
4934                 if (ret2 < 0)
4935                         goto out;
4936 
4937                 /*
4938                  * Ensure that the pagecache pages are written to disk and
4939                  * invalidated to preserve the expected O_DIRECT semantics.
4940                  */
4941                 if (ret2 > 0) {
4942                         loff_t bufio_end_pos = bufio_start_pos + ret2 - 1;
4943 
4944                         ret += ret2;
4945 
4946                         f2fs_flush_buffered_write(file->f_mapping,
4947                                                   bufio_start_pos,
4948                                                   bufio_end_pos);
4949                 }
4950         } else {
4951                 /* iomap_dio_rw() already handled the generic_write_sync(). */
4952                 *may_need_sync = false;
4953         }
4954 out:
4955         trace_f2fs_direct_IO_exit(inode, pos, count, WRITE, ret);
4956         return ret;
4957 }
4958 
4959 static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
4960 {
4961         struct inode *inode = file_inode(iocb->ki_filp);
4962         const loff_t orig_pos = iocb->ki_pos;
4963         const size_t orig_count = iov_iter_count(from);
4964         loff_t target_size;
4965         bool dio;
4966         bool may_need_sync = true;
4967         int preallocated;
4968         const loff_t pos = iocb->ki_pos;
4969         const ssize_t count = iov_iter_count(from);
4970         ssize_t ret;
4971 
4972         if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) {
4973                 ret = -EIO;
4974                 goto out;
4975         }
4976 
4977         if (!f2fs_is_compress_backend_ready(inode)) {
4978                 ret = -EOPNOTSUPP;
4979                 goto out;
4980         }
4981 
4982         if (iocb->ki_flags & IOCB_NOWAIT) {
4983                 if (!inode_trylock(inode)) {
4984                         ret = -EAGAIN;
4985                         goto out;
4986                 }
4987         } else {
4988                 inode_lock(inode);
4989         }
4990 
4991         if (f2fs_is_pinned_file(inode) &&
4992             !f2fs_overwrite_io(inode, pos, count)) {
4993                 ret = -EIO;
4994                 goto out_unlock;
4995         }
4996 
4997         ret = f2fs_write_checks(iocb, from);
4998         if (ret <= 0)
4999                 goto out_unlock;
5000 
5001         /* Determine whether we will do a direct write or a buffered write. */
5002         dio = f2fs_should_use_dio(inode, iocb, from);
5003 
5004         /* Possibly preallocate the blocks for the write. */
5005         target_size = iocb->ki_pos + iov_iter_count(from);
5006         preallocated = f2fs_preallocate_blocks(iocb, from, dio);
5007         if (preallocated < 0) {
5008                 ret = preallocated;
5009         } else {
5010                 if (trace_f2fs_datawrite_start_enabled())
5011                         f2fs_trace_rw_file_path(iocb->ki_filp, iocb->ki_pos,
5012                                                 orig_count, WRITE);
5013 
5014                 /* Do the actual write. */
5015                 ret = dio ?
5016                         f2fs_dio_write_iter(iocb, from, &may_need_sync) :
5017                         f2fs_buffered_write_iter(iocb, from);
5018 
5019                 if (trace_f2fs_datawrite_end_enabled())
5020                         trace_f2fs_datawrite_end(inode, orig_pos, ret);
5021         }
5022 
5023         /* Don't leave any preallocated blocks around past i_size. */
5024         if (preallocated && i_size_read(inode) < target_size) {
5025                 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
5026                 filemap_invalidate_lock(inode->i_mapping);
5027                 if (!f2fs_truncate(inode))
5028                         file_dont_truncate(inode);
5029                 filemap_invalidate_unlock(inode->i_mapping);
5030                 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
5031         } else {
5032                 file_dont_truncate(inode);
5033         }
5034 
5035         clear_inode_flag(inode, FI_PREALLOCATED_ALL);
5036 out_unlock:
5037         inode_unlock(inode);
5038 out:
5039         trace_f2fs_file_write_iter(inode, orig_pos, orig_count, ret);
5040 
5041         if (ret > 0 && may_need_sync)
5042                 ret = generic_write_sync(iocb, ret);
5043 
5044         /* If buffered IO was forced, flush and drop the data from
5045          * the page cache to preserve O_DIRECT semantics
5046          */
5047         if (ret > 0 && !dio && (iocb->ki_flags & IOCB_DIRECT))
5048                 f2fs_flush_buffered_write(iocb->ki_filp->f_mapping,
5049                                           orig_pos,
5050                                           orig_pos + ret - 1);
5051 
5052         return ret;
5053 }
5054 
5055 static int f2fs_file_fadvise(struct file *filp, loff_t offset, loff_t len,
5056                 int advice)
5057 {
5058         struct address_space *mapping;
5059         struct backing_dev_info *bdi;
5060         struct inode *inode = file_inode(filp);
5061         int err;
5062 
5063         if (advice == POSIX_FADV_SEQUENTIAL) {
5064                 if (S_ISFIFO(inode->i_mode))
5065                         return -ESPIPE;
5066 
5067                 mapping = filp->f_mapping;
5068                 if (!mapping || len < 0)
5069                         return -EINVAL;
5070 
5071                 bdi = inode_to_bdi(mapping->host);
5072                 filp->f_ra.ra_pages = bdi->ra_pages *
5073                         F2FS_I_SB(inode)->seq_file_ra_mul;
5074                 spin_lock(&filp->f_lock);
5075                 filp->f_mode &= ~FMODE_RANDOM;
5076                 spin_unlock(&filp->f_lock);
5077                 return 0;
5078         } else if (advice == POSIX_FADV_WILLNEED && offset == 0) {
5079                 /* Load extent cache at the first readahead. */
5080                 f2fs_precache_extents(inode);
5081         }
5082 
5083         err = generic_fadvise(filp, offset, len, advice);
5084         if (!err && advice == POSIX_FADV_DONTNEED &&
5085                 test_opt(F2FS_I_SB(inode), COMPRESS_CACHE) &&
5086                 f2fs_compressed_file(inode))
5087                 f2fs_invalidate_compress_pages(F2FS_I_SB(inode), inode->i_ino);
5088 
5089         return err;
5090 }
5091 
5092 #ifdef CONFIG_COMPAT
5093 struct compat_f2fs_gc_range {
5094         u32 sync;
5095         compat_u64 start;
5096         compat_u64 len;
5097 };
5098 #define F2FS_IOC32_GARBAGE_COLLECT_RANGE        _IOW(F2FS_IOCTL_MAGIC, 11,\
5099                                                 struct compat_f2fs_gc_range)
5100 
5101 static int f2fs_compat_ioc_gc_range(struct file *file, unsigned long arg)
5102 {
5103         struct compat_f2fs_gc_range __user *urange;
5104         struct f2fs_gc_range range;
5105         int err;
5106 
5107         urange = compat_ptr(arg);
5108         err = get_user(range.sync, &urange->sync);
5109         err |= get_user(range.start, &urange->start);
5110         err |= get_user(range.len, &urange->len);
5111         if (err)
5112                 return -EFAULT;
5113 
5114         return __f2fs_ioc_gc_range(file, &range);
5115 }
5116 
5117 struct compat_f2fs_move_range {
5118         u32 dst_fd;
5119         compat_u64 pos_in;
5120         compat_u64 pos_out;
5121         compat_u64 len;
5122 };
5123 #define F2FS_IOC32_MOVE_RANGE           _IOWR(F2FS_IOCTL_MAGIC, 9,      \
5124                                         struct compat_f2fs_move_range)
5125 
5126 static int f2fs_compat_ioc_move_range(struct file *file, unsigned long arg)
5127 {
5128         struct compat_f2fs_move_range __user *urange;
5129         struct f2fs_move_range range;
5130         int err;
5131 
5132         urange = compat_ptr(arg);
5133         err = get_user(range.dst_fd, &urange->dst_fd);
5134         err |= get_user(range.pos_in, &urange->pos_in);
5135         err |= get_user(range.pos_out, &urange->pos_out);
5136         err |= get_user(range.len, &urange->len);
5137         if (err)
5138                 return -EFAULT;
5139 
5140         return __f2fs_ioc_move_range(file, &range);
5141 }
5142 
5143 long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
5144 {
5145         if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(file)))))
5146                 return -EIO;
5147         if (!f2fs_is_checkpoint_ready(F2FS_I_SB(file_inode(file))))
5148                 return -ENOSPC;
5149 
5150         switch (cmd) {
5151         case FS_IOC32_GETVERSION:
5152                 cmd = FS_IOC_GETVERSION;
5153                 break;
5154         case F2FS_IOC32_GARBAGE_COLLECT_RANGE:
5155                 return f2fs_compat_ioc_gc_range(file, arg);
5156         case F2FS_IOC32_MOVE_RANGE:
5157                 return f2fs_compat_ioc_move_range(file, arg);
5158         case F2FS_IOC_START_ATOMIC_WRITE:
5159         case F2FS_IOC_START_ATOMIC_REPLACE:
5160         case F2FS_IOC_COMMIT_ATOMIC_WRITE:
5161         case F2FS_IOC_START_VOLATILE_WRITE:
5162         case F2FS_IOC_RELEASE_VOLATILE_WRITE:
5163         case F2FS_IOC_ABORT_ATOMIC_WRITE:
5164         case F2FS_IOC_SHUTDOWN:
5165         case FITRIM:
5166         case FS_IOC_SET_ENCRYPTION_POLICY:
5167         case FS_IOC_GET_ENCRYPTION_PWSALT:
5168         case FS_IOC_GET_ENCRYPTION_POLICY:
5169         case FS_IOC_GET_ENCRYPTION_POLICY_EX:
5170         case FS_IOC_ADD_ENCRYPTION_KEY:
5171         case FS_IOC_REMOVE_ENCRYPTION_KEY:
5172         case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS:
5173         case FS_IOC_GET_ENCRYPTION_KEY_STATUS:
5174         case FS_IOC_GET_ENCRYPTION_NONCE:
5175         case F2FS_IOC_GARBAGE_COLLECT:
5176         case F2FS_IOC_WRITE_CHECKPOINT:
5177         case F2FS_IOC_DEFRAGMENT:
5178         case F2FS_IOC_FLUSH_DEVICE:
5179         case F2FS_IOC_GET_FEATURES:
5180         case F2FS_IOC_GET_PIN_FILE:
5181         case F2FS_IOC_SET_PIN_FILE:
5182         case F2FS_IOC_PRECACHE_EXTENTS:
5183         case F2FS_IOC_RESIZE_FS:
5184         case FS_IOC_ENABLE_VERITY:
5185         case FS_IOC_MEASURE_VERITY:
5186         case FS_IOC_READ_VERITY_METADATA:
5187         case FS_IOC_GETFSLABEL:
5188         case FS_IOC_SETFSLABEL:
5189         case F2FS_IOC_GET_COMPRESS_BLOCKS:
5190         case F2FS_IOC_RELEASE_COMPRESS_BLOCKS:
5191         case F2FS_IOC_RESERVE_COMPRESS_BLOCKS:
5192         case F2FS_IOC_SEC_TRIM_FILE:
5193         case F2FS_IOC_GET_COMPRESS_OPTION:
5194         case F2FS_IOC_SET_COMPRESS_OPTION:
5195         case F2FS_IOC_DECOMPRESS_FILE:
5196         case F2FS_IOC_COMPRESS_FILE:
5197                 break;
5198         default:
5199                 return -ENOIOCTLCMD;
5200         }
5201         return __f2fs_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
5202 }
5203 #endif
5204 
5205 const struct file_operations f2fs_file_operations = {
5206         .llseek         = f2fs_llseek,
5207         .read_iter      = f2fs_file_read_iter,
5208         .write_iter     = f2fs_file_write_iter,
5209         .iopoll         = iocb_bio_iopoll,
5210         .open           = f2fs_file_open,
5211         .release        = f2fs_release_file,
5212         .mmap           = f2fs_file_mmap,
5213         .flush          = f2fs_file_flush,
5214         .fsync          = f2fs_sync_file,
5215         .fallocate      = f2fs_fallocate,
5216         .unlocked_ioctl = f2fs_ioctl,
5217 #ifdef CONFIG_COMPAT
5218         .compat_ioctl   = f2fs_compat_ioctl,
5219 #endif
5220         .splice_read    = f2fs_file_splice_read,
5221         .splice_write   = iter_file_splice_write,
5222         .fadvise        = f2fs_file_fadvise,
5223         .fop_flags      = FOP_BUFFER_RASYNC,
5224 };
5225
~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.
TOMOYO Linux Cross Reference Linux/fs/f2fs/file.c

TOMOYO Linux Cross Reference
Linux/fs/f2fs/file.c