1 /* 1 /* 2 * Resizable virtual memory filesystem for Lin 2 * Resizable virtual memory filesystem for Linux. 3 * 3 * 4 * Copyright (C) 2000 Linus Torvalds. 4 * Copyright (C) 2000 Linus Torvalds. 5 * 2000 Transmeta Corp. 5 * 2000 Transmeta Corp. 6 * 2000-2001 Christoph Rohland 6 * 2000-2001 Christoph Rohland 7 * 2000-2001 SAP AG 7 * 2000-2001 SAP AG 8 * 2002 Red Hat Inc. 8 * 2002 Red Hat Inc. 9 * Copyright (C) 2002-2011 Hugh Dickins. !! 9 * Copyright (C) 2002-2003 Hugh Dickins. 10 * Copyright (C) 2011 Google Inc. !! 10 * Copyright (C) 2002-2003 VERITAS Software Corporation. 11 * Copyright (C) 2002-2005 VERITAS Software Co << 12 * Copyright (C) 2004 Andi Kleen, SuSE Labs << 13 * << 14 * Extended attribute support for tmpfs: << 15 * Copyright (c) 2004, Luke Kenneth Casson Lei << 16 * Copyright (c) 2004 Red Hat, Inc., James Mor << 17 * << 18 * tiny-shmem: << 19 * Copyright (c) 2004, 2008 Matt Mackall <mpm@ << 20 * 11 * 21 * This file is released under the GPL. 12 * This file is released under the GPL. 22 */ 13 */ 23 14 24 #include <linux/fs.h> << 25 #include <linux/init.h> << 26 #include <linux/vfs.h> << 27 #include <linux/mount.h> << 28 #include <linux/ramfs.h> << 29 #include <linux/pagemap.h> << 30 #include <linux/file.h> << 31 #include <linux/fileattr.h> << 32 #include <linux/mm.h> << 33 #include <linux/random.h> << 34 #include <linux/sched/signal.h> << 35 #include <linux/export.h> << 36 #include <linux/shmem_fs.h> << 37 #include <linux/swap.h> << 38 #include <linux/uio.h> << 39 #include <linux/hugetlb.h> << 40 #include <linux/fs_parser.h> << 41 #include <linux/swapfile.h> << 42 #include <linux/iversion.h> << 43 #include "swap.h" << 44 << 45 static struct vfsmount *shm_mnt __ro_after_ini << 46 << 47 #ifdef CONFIG_SHMEM << 48 /* 15 /* 49 * This virtual memory filesystem is heavily b 16 * This virtual memory filesystem is heavily based on the ramfs. It 50 * extends ramfs by the ability to use swap an 17 * extends ramfs by the ability to use swap and honor resource limits 51 * which makes it a completely usable filesyst 18 * which makes it a completely usable filesystem. 52 */ 19 */ 53 20 54 #include <linux/xattr.h> !! 21 #include <linux/config.h> 55 #include <linux/exportfs.h> !! 22 #include <linux/module.h> 56 #include <linux/posix_acl.h> !! 23 #include <linux/init.h> 57 #include <linux/posix_acl_xattr.h> !! 24 #include <linux/devfs_fs_kernel.h> 58 #include <linux/mman.h> !! 25 #include <linux/fs.h> >> 26 #include <linux/mm.h> >> 27 #include <linux/file.h> >> 28 #include <linux/swap.h> >> 29 #include <linux/pagemap.h> 59 #include <linux/string.h> 30 #include <linux/string.h> 60 #include <linux/slab.h> !! 31 #include <linux/locks.h> 61 #include <linux/backing-dev.h> !! 32 #include <linux/smp_lock.h> 62 #include <linux/writeback.h> << 63 #include <linux/pagevec.h> << 64 #include <linux/percpu_counter.h> << 65 #include <linux/falloc.h> << 66 #include <linux/splice.h> << 67 #include <linux/security.h> << 68 #include <linux/swapops.h> << 69 #include <linux/mempolicy.h> << 70 #include <linux/namei.h> << 71 #include <linux/ctype.h> << 72 #include <linux/migrate.h> << 73 #include <linux/highmem.h> << 74 #include <linux/seq_file.h> << 75 #include <linux/magic.h> << 76 #include <linux/syscalls.h> << 77 #include <linux/fcntl.h> << 78 #include <uapi/linux/memfd.h> << 79 #include <linux/rmap.h> << 80 #include <linux/uuid.h> << 81 #include <linux/quotaops.h> << 82 #include <linux/rcupdate_wait.h> << 83 << 84 #include <linux/uaccess.h> << 85 << 86 #include "internal.h" << 87 33 88 #define BLOCKS_PER_PAGE (PAGE_SIZE/512) !! 34 #include <asm/uaccess.h> 89 #define VM_ACCT(size) (PAGE_ALIGN(size) >> !! 35 #include <asm/div64.h> 90 36 91 /* Pretend that each entry is of this size in !! 37 /* This magic number is used in glibc for posix shared memory */ 92 #define BOGO_DIRENT_SIZE 20 !! 38 #define TMPFS_MAGIC 0x01021994 93 << 94 /* Pretend that one inode + its dentry occupy << 95 #define BOGO_INODE_SIZE 1024 << 96 << 97 /* Symlink up to this size is kmalloc'ed inste << 98 #define SHORT_SYMLINK_LEN 128 << 99 << 100 /* << 101 * shmem_fallocate communicates with shmem_fau << 102 * inode->i_private (with i_rwsem making sure << 103 * a time): we would prefer not to enlarge the << 104 */ << 105 struct shmem_falloc { << 106 wait_queue_head_t *waitq; /* faults in << 107 pgoff_t start; /* start of ra << 108 pgoff_t next; /* the next pa << 109 pgoff_t nr_falloced; /* how many ne << 110 pgoff_t nr_unswapped; /* how often w << 111 }; << 112 << 113 struct shmem_options { << 114 unsigned long long blocks; << 115 unsigned long long inodes; << 116 struct mempolicy *mpol; << 117 kuid_t uid; << 118 kgid_t gid; << 119 umode_t mode; << 120 bool full_inums; << 121 int huge; << 122 int seen; << 123 bool noswap; << 124 unsigned short quota_types; << 125 struct shmem_quota_limits qlimits; << 126 #define SHMEM_SEEN_BLOCKS 1 << 127 #define SHMEM_SEEN_INODES 2 << 128 #define SHMEM_SEEN_HUGE 4 << 129 #define SHMEM_SEEN_INUMS 8 << 130 #define SHMEM_SEEN_NOSWAP 16 << 131 #define SHMEM_SEEN_QUOTA 32 << 132 }; << 133 << 134 #ifdef CONFIG_TRANSPARENT_HUGEPAGE << 135 static unsigned long huge_shmem_orders_always << 136 static unsigned long huge_shmem_orders_madvise << 137 static unsigned long huge_shmem_orders_inherit << 138 static unsigned long huge_shmem_orders_within_ << 139 #endif << 140 << 141 #ifdef CONFIG_TMPFS << 142 static unsigned long shmem_default_max_blocks( << 143 { << 144 return totalram_pages() / 2; << 145 } << 146 << 147 static unsigned long shmem_default_max_inodes( << 148 { << 149 unsigned long nr_pages = totalram_page << 150 << 151 return min3(nr_pages - totalhigh_pages << 152 ULONG_MAX / BOGO_INODE << 153 } << 154 #endif << 155 << 156 static int shmem_swapin_folio(struct inode *in << 157 struct folio **foliop, << 158 struct vm_area_struct << 159 << 160 static inline struct shmem_sb_info *SHMEM_SB(s << 161 { << 162 return sb->s_fs_info; << 163 } << 164 << 165 /* << 166 * shmem_file_setup pre-accounts the whole fix << 167 * for shared memory and for shared anonymous << 168 * (unless MAP_NORESERVE and sysctl_overcommit << 169 * consistent with the pre-accounting of priva << 170 */ << 171 static inline int shmem_acct_size(unsigned lon << 172 { << 173 return (flags & VM_NORESERVE) ? << 174 0 : security_vm_enough_memory_ << 175 } << 176 39 177 static inline void shmem_unacct_size(unsigned !! 40 #define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long)) 178 { !! 41 #define ENTRIES_PER_PAGEPAGE (ENTRIES_PER_PAGE*ENTRIES_PER_PAGE) 179 if (!(flags & VM_NORESERVE)) !! 42 #define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512) 180 vm_unacct_memory(VM_ACCT(size) << 181 } << 182 43 183 static inline int shmem_reacct_size(unsigned l !! 44 #define SHMEM_MAX_INDEX (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1)) 184 loff_t oldsize, loff_t newsize !! 45 #define SHMEM_MAX_BYTES ((unsigned long long)SHMEM_MAX_INDEX << PAGE_CACHE_SHIFT) 185 { << 186 if (!(flags & VM_NORESERVE)) { << 187 if (VM_ACCT(newsize) > VM_ACCT << 188 return security_vm_eno << 189 VM_ACC << 190 else if (VM_ACCT(newsize) < VM << 191 vm_unacct_memory(VM_AC << 192 } << 193 return 0; << 194 } << 195 46 196 /* !! 47 #define VM_ACCT(size) (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT) 197 * ... whereas tmpfs objects are accounted inc << 198 * pages are allocated, in order to allow larg << 199 * shmem_get_folio reports shmem_acct_blocks f << 200 * so that a failure on a sparse tmpfs mapping << 201 */ << 202 static inline int shmem_acct_blocks(unsigned l << 203 { << 204 if (!(flags & VM_NORESERVE)) << 205 return 0; << 206 48 207 return security_vm_enough_memory_mm(cu !! 49 /* info->flags needs VM_flags to handle pagein/truncate race efficiently */ 208 pages * VM_ACCT(PAGE_S !! 50 #define SHMEM_PAGEIN VM_READ 209 } !! 51 #define SHMEM_TRUNCATE VM_WRITE 210 52 211 static inline void shmem_unacct_blocks(unsigne !! 53 /* Pretend that each entry is of this size in directory's i_size */ 212 { !! 54 #define BOGO_DIRENT_SIZE 20 213 if (flags & VM_NORESERVE) << 214 vm_unacct_memory(pages * VM_AC << 215 } << 216 55 217 static int shmem_inode_acct_blocks(struct inod !! 56 #define SHMEM_SB(sb) (&sb->u.shmem_sb) 218 { << 219 struct shmem_inode_info *info = SHMEM_ << 220 struct shmem_sb_info *sbinfo = SHMEM_S << 221 int err = -ENOSPC; << 222 57 223 if (shmem_acct_blocks(info->flags, pag !! 58 /* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */ 224 return err; !! 59 enum sgp_type { >> 60 SGP_READ, /* don't exceed i_size, don't allocate page */ >> 61 SGP_CACHE, /* don't exceed i_size, may allocate page */ >> 62 SGP_WRITE, /* may exceed i_size, may allocate page */ >> 63 }; 225 64 226 might_sleep(); /* when quotas */ !! 65 static int shmem_getpage(struct inode *inode, unsigned long idx, 227 if (sbinfo->max_blocks) { !! 66 struct page **pagep, enum sgp_type sgp); 228 if (!percpu_counter_limited_ad << 229 << 230 goto unacct; << 231 << 232 err = dquot_alloc_block_nodirt << 233 if (err) { << 234 percpu_counter_sub(&sb << 235 goto unacct; << 236 } << 237 } else { << 238 err = dquot_alloc_block_nodirt << 239 if (err) << 240 goto unacct; << 241 } << 242 67 243 return 0; !! 68 static struct super_operations shmem_ops; >> 69 static struct address_space_operations shmem_aops; >> 70 static struct file_operations shmem_file_operations; >> 71 static struct inode_operations shmem_inode_operations; >> 72 static struct inode_operations shmem_dir_inode_operations; >> 73 static struct vm_operations_struct shmem_vm_ops; 244 74 245 unacct: !! 75 LIST_HEAD(shmem_inodes); 246 shmem_unacct_blocks(info->flags, pages !! 76 static spinlock_t shmem_ilock = SPIN_LOCK_UNLOCKED; 247 return err; << 248 } << 249 77 250 static void shmem_inode_unacct_blocks(struct i !! 78 static void shmem_free_block(struct inode *inode) 251 { 79 { 252 struct shmem_inode_info *info = SHMEM_ << 253 struct shmem_sb_info *sbinfo = SHMEM_S 80 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); 254 !! 81 spin_lock(&sbinfo->stat_lock); 255 might_sleep(); /* when quotas */ !! 82 sbinfo->free_blocks++; 256 dquot_free_block_nodirty(inode, pages) !! 83 inode->i_blocks -= BLOCKS_PER_PAGE; 257 !! 84 spin_unlock(&sbinfo->stat_lock); 258 if (sbinfo->max_blocks) << 259 percpu_counter_sub(&sbinfo->us << 260 shmem_unacct_blocks(info->flags, pages << 261 } << 262 << 263 static const struct super_operations shmem_ops << 264 static const struct address_space_operations s << 265 static const struct file_operations shmem_file << 266 static const struct inode_operations shmem_ino << 267 static const struct inode_operations shmem_dir << 268 static const struct inode_operations shmem_spe << 269 static const struct vm_operations_struct shmem << 270 static const struct vm_operations_struct shmem << 271 static struct file_system_type shmem_fs_type; << 272 << 273 bool shmem_mapping(struct address_space *mappi << 274 { << 275 return mapping->a_ops == &shmem_aops; << 276 } << 277 EXPORT_SYMBOL_GPL(shmem_mapping); << 278 << 279 bool vma_is_anon_shmem(struct vm_area_struct * << 280 { << 281 return vma->vm_ops == &shmem_anon_vm_o << 282 } << 283 << 284 bool vma_is_shmem(struct vm_area_struct *vma) << 285 { << 286 return vma_is_anon_shmem(vma) || vma-> << 287 } << 288 << 289 static LIST_HEAD(shmem_swaplist); << 290 static DEFINE_MUTEX(shmem_swaplist_mutex); << 291 << 292 #ifdef CONFIG_TMPFS_QUOTA << 293 << 294 static int shmem_enable_quotas(struct super_bl << 295 unsigned short << 296 { << 297 int type, err = 0; << 298 << 299 sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS << 300 for (type = 0; type < SHMEM_MAXQUOTAS; << 301 if (!(quota_types & (1 << type << 302 continue; << 303 err = dquot_load_quota_sb(sb, << 304 DQUO << 305 DQUO << 306 if (err) << 307 goto out_err; << 308 } << 309 return 0; << 310 << 311 out_err: << 312 pr_warn("tmpfs: failed to enable quota << 313 type, err); << 314 for (type--; type >= 0; type--) << 315 dquot_quota_off(sb, type); << 316 return err; << 317 } << 318 << 319 static void shmem_disable_quotas(struct super_ << 320 { << 321 int type; << 322 << 323 for (type = 0; type < SHMEM_MAXQUOTAS; << 324 dquot_quota_off(sb, type); << 325 } 85 } 326 86 327 static struct dquot __rcu **shmem_get_dquots(s !! 87 static void shmem_removepage(struct page *page) 328 { 88 { 329 return SHMEM_I(inode)->i_dquot; !! 89 if (!PageLaunder(page) && !PageError(page)) >> 90 shmem_free_block(page->mapping->host); 330 } 91 } 331 #endif /* CONFIG_TMPFS_QUOTA */ << 332 92 333 /* 93 /* 334 * shmem_reserve_inode() performs bookkeeping !! 94 * shmem_swp_entry - find the swap vector position in the info structure 335 * produces a novel ino for the newly allocate << 336 * 95 * 337 * It may also be called when making a hard li !! 96 * @info: info structure for the inode 338 * each dentry. However, in that case, no new !! 97 * @index: index of the page to find 339 * internally draws from another pool of inode !! 98 * @page: optional page to add to the structure. Has to be preset to 340 * get_next_ino()). This case is indicated by !! 99 * all zeros 341 */ << 342 #define SHMEM_INO_BATCH 1024 << 343 static int shmem_reserve_inode(struct super_bl << 344 { << 345 struct shmem_sb_info *sbinfo = SHMEM_S << 346 ino_t ino; << 347 << 348 if (!(sb->s_flags & SB_KERNMOUNT)) { << 349 raw_spin_lock(&sbinfo->stat_lo << 350 if (sbinfo->max_inodes) { << 351 if (sbinfo->free_ispac << 352 raw_spin_unloc << 353 return -ENOSPC << 354 } << 355 sbinfo->free_ispace -= << 356 } << 357 if (inop) { << 358 ino = sbinfo->next_ino << 359 if (unlikely(is_zero_i << 360 ino = sbinfo-> << 361 if (unlikely(!sbinfo-> << 362 ino > UIN << 363 /* << 364 * Emulate get << 365 * compatibili << 366 */ << 367 if (IS_ENABLED << 368 pr_war << 369 << 370 sbinfo->next_i << 371 ino = sbinfo-> << 372 } << 373 *inop = ino; << 374 } << 375 raw_spin_unlock(&sbinfo->stat_ << 376 } else if (inop) { << 377 /* << 378 * __shmem_file_setup, one of << 379 * doesn't hold stat_lock in s << 380 * max_inodes is always 0, and << 381 * unknown contexts. As such, << 382 * which doesn't require the p << 383 * the batch boundary. << 384 * << 385 * We don't need to worry abou << 386 * shmem mounts are not expose << 387 * to worry about things like << 388 */ << 389 ino_t *next_ino; << 390 << 391 next_ino = per_cpu_ptr(sbinfo- << 392 ino = *next_ino; << 393 if (unlikely(ino % SHMEM_INO_B << 394 raw_spin_lock(&sbinfo- << 395 ino = sbinfo->next_ino << 396 sbinfo->next_ino += SH << 397 raw_spin_unlock(&sbinf << 398 if (unlikely(is_zero_i << 399 ino++; << 400 } << 401 *inop = ino; << 402 *next_ino = ++ino; << 403 put_cpu(); << 404 } << 405 << 406 return 0; << 407 } << 408 << 409 static void shmem_free_inode(struct super_bloc << 410 { << 411 struct shmem_sb_info *sbinfo = SHMEM_S << 412 if (sbinfo->max_inodes) { << 413 raw_spin_lock(&sbinfo->stat_lo << 414 sbinfo->free_ispace += BOGO_IN << 415 raw_spin_unlock(&sbinfo->stat_ << 416 } << 417 } << 418 << 419 /** << 420 * shmem_recalc_inode - recalculate the block << 421 * @inode: inode to recalc << 422 * @alloced: the change in number of pages all << 423 * @swapped: the change in number of pages swa << 424 * 100 * 425 * We have to calculate the free blocks since !! 101 * If there is no space allocated yet it will return NULL when 426 * undirtied hole pages behind our back. !! 102 * page is 0, else it will use the page for the needed block, >> 103 * setting it to 0 on return to indicate that it has been used. 427 * 104 * 428 * But normally info->alloced == inode->i_ma !! 105 * The swap vector is organized the following way: 429 * So mm freed is info->alloced - (inode->i_ma << 430 */ << 431 static void shmem_recalc_inode(struct inode *i << 432 { << 433 struct shmem_inode_info *info = SHMEM_ << 434 long freed; << 435 << 436 spin_lock(&info->lock); << 437 info->alloced += alloced; << 438 info->swapped += swapped; << 439 freed = info->alloced - info->swapped << 440 READ_ONCE(inode->i_mapping->nr << 441 /* << 442 * Special case: whereas normally shme << 443 * after i_mapping->nrpages has alread << 444 * shmem_writepage() has to raise swap << 445 * to stop a racing shmem_recalc_inode << 446 * been freed. Compensate here, to av << 447 */ << 448 if (swapped > 0) << 449 freed += swapped; << 450 if (freed > 0) << 451 info->alloced -= freed; << 452 spin_unlock(&info->lock); << 453 << 454 /* The quota case may block */ << 455 if (freed > 0) << 456 shmem_inode_unacct_blocks(inod << 457 } << 458 << 459 bool shmem_charge(struct inode *inode, long pa << 460 { << 461 struct address_space *mapping = inode- << 462 << 463 if (shmem_inode_acct_blocks(inode, pag << 464 return false; << 465 << 466 /* nrpages adjustment first, then shme << 467 xa_lock_irq(&mapping->i_pages); << 468 mapping->nrpages += pages; << 469 xa_unlock_irq(&mapping->i_pages); << 470 << 471 shmem_recalc_inode(inode, pages, 0); << 472 return true; << 473 } << 474 << 475 void shmem_uncharge(struct inode *inode, long << 476 { << 477 /* pages argument is currently unused: << 478 /* nrpages adjustment done by __filema << 479 << 480 shmem_recalc_inode(inode, 0, 0); << 481 } << 482 << 483 /* << 484 * Replace item expected in xarray by a new it << 485 */ << 486 static int shmem_replace_entry(struct address_ << 487 pgoff_t index, void *e << 488 { << 489 XA_STATE(xas, &mapping->i_pages, index << 490 void *item; << 491 << 492 VM_BUG_ON(!expected); << 493 VM_BUG_ON(!replacement); << 494 item = xas_load(&xas); << 495 if (item != expected) << 496 return -ENOENT; << 497 xas_store(&xas, replacement); << 498 return 0; << 499 } << 500 << 501 /* << 502 * Sometimes, before we decide whether to proc << 503 * that an entry was not already brought back << 504 * 106 * 505 * Checking folio is not enough: by the time a !! 107 * There are SHMEM_NR_DIRECT entries directly stored in the 506 * might be reused, and again be swapcache, us !! 108 * shmem_inode_info structure. So small files do not need an addional 507 */ !! 109 * allocation. 508 static bool shmem_confirm_swap(struct address_ << 509 pgoff_t index, << 510 { << 511 return xa_load(&mapping->i_pages, inde << 512 } << 513 << 514 /* << 515 * Definitions for "huge tmpfs": tmpfs mounted << 516 * 110 * 517 * SHMEM_HUGE_NEVER: !! 111 * For pages with index > SHMEM_NR_DIRECT there is the pointer 518 * disables huge pages for the mount; !! 112 * i_indirect which points to a page which holds in the first half 519 * SHMEM_HUGE_ALWAYS: !! 113 * doubly indirect blocks, in the second half triple indirect blocks: 520 * enables huge pages for the mount; << 521 * SHMEM_HUGE_WITHIN_SIZE: << 522 * only allocate huge pages if the page w << 523 * also respect fadvise()/madvise() hints << 524 * SHMEM_HUGE_ADVISE: << 525 * only allocate huge pages if requested << 526 */ << 527 << 528 #define SHMEM_HUGE_NEVER 0 << 529 #define SHMEM_HUGE_ALWAYS 1 << 530 #define SHMEM_HUGE_WITHIN_SIZE 2 << 531 #define SHMEM_HUGE_ADVISE 3 << 532 << 533 /* << 534 * Special values. << 535 * Only can be set via /sys/kernel/mm/transpar << 536 * 114 * 537 * SHMEM_HUGE_DENY: !! 115 * For an artificial ENTRIES_PER_PAGE = 4 this would lead to the 538 * disables huge on shm_mnt and all mount !! 116 * following layout (for SHMEM_NR_DIRECT == 16): 539 * SHMEM_HUGE_FORCE: << 540 * enables huge on shm_mnt and all mounts << 541 * 117 * >> 118 * i_indirect -> dir --> 16-19 >> 119 * | +-> 20-23 >> 120 * | >> 121 * +-->dir2 --> 24-27 >> 122 * | +-> 28-31 >> 123 * | +-> 32-35 >> 124 * | +-> 36-39 >> 125 * | >> 126 * +-->dir3 --> 40-43 >> 127 * +-> 44-47 >> 128 * +-> 48-51 >> 129 * +-> 52-55 542 */ 130 */ 543 #define SHMEM_HUGE_DENY (-1) !! 131 static swp_entry_t *shmem_swp_entry(struct shmem_inode_info *info, unsigned long index, unsigned long *page) 544 #define SHMEM_HUGE_FORCE (-2) << 545 << 546 #ifdef CONFIG_TRANSPARENT_HUGEPAGE << 547 /* ifdef here to avoid bloating shmem.o when n << 548 << 549 static int shmem_huge __read_mostly = SHMEM_HU << 550 << 551 static bool __shmem_huge_global_enabled(struct << 552 loff_t << 553 struct << 554 unsign << 555 { << 556 struct mm_struct *mm = vma ? vma->vm_m << 557 loff_t i_size; << 558 << 559 if (!S_ISREG(inode->i_mode)) << 560 return false; << 561 if (mm && ((vm_flags & VM_NOHUGEPAGE) << 562 return false; << 563 if (shmem_huge == SHMEM_HUGE_DENY) << 564 return false; << 565 if (shmem_huge_force || shmem_huge == << 566 return true; << 567 << 568 switch (SHMEM_SB(inode->i_sb)->huge) { << 569 case SHMEM_HUGE_ALWAYS: << 570 return true; << 571 case SHMEM_HUGE_WITHIN_SIZE: << 572 index = round_up(index + 1, HP << 573 i_size = max(write_end, i_size << 574 i_size = round_up(i_size, PAGE << 575 if (i_size >> PAGE_SHIFT >= in << 576 return true; << 577 fallthrough; << 578 case SHMEM_HUGE_ADVISE: << 579 if (mm && (vm_flags & VM_HUGEP << 580 return true; << 581 fallthrough; << 582 default: << 583 return false; << 584 } << 585 } << 586 << 587 static bool shmem_huge_global_enabled(struct i << 588 loff_t write_end, bool shme << 589 struct vm_area_struct *vma, << 590 { << 591 if (HPAGE_PMD_ORDER > MAX_PAGECACHE_OR << 592 return false; << 593 << 594 return __shmem_huge_global_enabled(ino << 595 shm << 596 } << 597 << 598 #if defined(CONFIG_SYSFS) << 599 static int shmem_parse_huge(const char *str) << 600 { << 601 if (!strcmp(str, "never")) << 602 return SHMEM_HUGE_NEVER; << 603 if (!strcmp(str, "always")) << 604 return SHMEM_HUGE_ALWAYS; << 605 if (!strcmp(str, "within_size")) << 606 return SHMEM_HUGE_WITHIN_SIZE; << 607 if (!strcmp(str, "advise")) << 608 return SHMEM_HUGE_ADVISE; << 609 if (!strcmp(str, "deny")) << 610 return SHMEM_HUGE_DENY; << 611 if (!strcmp(str, "force")) << 612 return SHMEM_HUGE_FORCE; << 613 return -EINVAL; << 614 } << 615 #endif << 616 << 617 #if defined(CONFIG_SYSFS) || defined(CONFIG_TM << 618 static const char *shmem_format_huge(int huge) << 619 { 132 { 620 switch (huge) { !! 133 unsigned long offset; 621 case SHMEM_HUGE_NEVER: !! 134 void **dir; 622 return "never"; << 623 case SHMEM_HUGE_ALWAYS: << 624 return "always"; << 625 case SHMEM_HUGE_WITHIN_SIZE: << 626 return "within_size"; << 627 case SHMEM_HUGE_ADVISE: << 628 return "advise"; << 629 case SHMEM_HUGE_DENY: << 630 return "deny"; << 631 case SHMEM_HUGE_FORCE: << 632 return "force"; << 633 default: << 634 VM_BUG_ON(1); << 635 return "bad_val"; << 636 } << 637 } << 638 #endif << 639 135 640 static unsigned long shmem_unused_huge_shrink( !! 136 if (index < SHMEM_NR_DIRECT) 641 struct shrink_control *sc, uns !! 137 return info->i_direct+index; 642 { !! 138 if (!info->i_indirect) { 643 LIST_HEAD(list), *pos, *next; !! 139 if (page) { 644 struct inode *inode; !! 140 info->i_indirect = (void **) *page; 645 struct shmem_inode_info *info; !! 141 *page = 0; 646 struct folio *folio; !! 142 } 647 unsigned long batch = sc ? sc->nr_to_s !! 143 return NULL; /* need another page */ 648 unsigned long split = 0, freed = 0; !! 144 } 649 !! 145 650 if (list_empty(&sbinfo->shrinklist)) !! 146 index -= SHMEM_NR_DIRECT; 651 return SHRINK_STOP; !! 147 offset = index % ENTRIES_PER_PAGE; 652 !! 148 index /= ENTRIES_PER_PAGE; 653 spin_lock(&sbinfo->shrinklist_lock); !! 149 dir = info->i_indirect; 654 list_for_each_safe(pos, next, &sbinfo- !! 150 655 info = list_entry(pos, struct !! 151 if (index >= ENTRIES_PER_PAGE/2) { 656 !! 152 index -= ENTRIES_PER_PAGE/2; 657 /* pin the inode */ !! 153 dir += ENTRIES_PER_PAGE/2 + index/ENTRIES_PER_PAGE; 658 inode = igrab(&info->vfs_inode !! 154 index %= ENTRIES_PER_PAGE; 659 !! 155 if (!*dir) { 660 /* inode is about to be evicte !! 156 if (page) { 661 if (!inode) { !! 157 *dir = (void *) *page; 662 list_del_init(&info->s !! 158 *page = 0; 663 goto next; !! 159 } >> 160 return NULL; /* need another page */ 664 } 161 } 665 !! 162 dir = (void **) *dir; 666 list_move(&info->shrinklist, & << 667 next: << 668 sbinfo->shrinklist_len--; << 669 if (!--batch) << 670 break; << 671 } 163 } 672 spin_unlock(&sbinfo->shrinklist_lock); << 673 << 674 list_for_each_safe(pos, next, &list) { << 675 pgoff_t next, end; << 676 loff_t i_size; << 677 int ret; << 678 << 679 info = list_entry(pos, struct << 680 inode = &info->vfs_inode; << 681 << 682 if (nr_to_free && freed >= nr_ << 683 goto move_back; << 684 << 685 i_size = i_size_read(inode); << 686 folio = filemap_get_entry(inod << 687 if (!folio || xa_is_value(foli << 688 goto drop; << 689 << 690 /* No large folio at the end o << 691 if (!folio_test_large(folio)) << 692 folio_put(folio); << 693 goto drop; << 694 } << 695 << 696 /* Check if there is anything << 697 next = folio_next_index(folio) << 698 end = shmem_fallocend(inode, D << 699 if (end <= folio->index || end << 700 folio_put(folio); << 701 goto drop; << 702 } << 703 << 704 /* << 705 * Move the inode on the list << 706 * to lock the page at this ti << 707 * << 708 * Waiting for the lock may le << 709 * reclaim path. << 710 */ << 711 if (!folio_trylock(folio)) { << 712 folio_put(folio); << 713 goto move_back; << 714 } << 715 164 716 ret = split_folio(folio); !! 165 dir += index; 717 folio_unlock(folio); !! 166 if (!*dir) { 718 folio_put(folio); !! 167 if (!page || !*page) 719 !! 168 return NULL; /* need a page */ 720 /* If split failed move the in !! 169 *dir = (void *) *page; 721 if (ret) !! 170 *page = 0; 722 goto move_back; << 723 << 724 freed += next - end; << 725 split++; << 726 drop: << 727 list_del_init(&info->shrinklis << 728 goto put; << 729 move_back: << 730 /* << 731 * Make sure the inode is eith << 732 * from any local list before << 733 * in another thread once we p << 734 * is corrupted). << 735 */ << 736 spin_lock(&sbinfo->shrinklist_ << 737 list_move(&info->shrinklist, & << 738 sbinfo->shrinklist_len++; << 739 spin_unlock(&sbinfo->shrinklis << 740 put: << 741 iput(inode); << 742 } 171 } 743 !! 172 return (swp_entry_t *) *dir + offset; 744 return split; << 745 } 173 } 746 174 747 static long shmem_unused_huge_scan(struct supe !! 175 /* 748 struct shrink_control *sc) !! 176 * shmem_swp_alloc - get the position of the swap entry for the page. 749 { !! 177 * If it does not exist allocate the entry. 750 struct shmem_sb_info *sbinfo = SHMEM_S !! 178 * 751 !! 179 * @info: info structure for the inode 752 if (!READ_ONCE(sbinfo->shrinklist_len) !! 180 * @index: index of the page to find 753 return SHRINK_STOP; !! 181 * @sgp: check and recheck i_size? skip allocation? 754 !! 182 */ 755 return shmem_unused_huge_shrink(sbinfo !! 183 static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long index, enum sgp_type sgp) 756 } << 757 << 758 static long shmem_unused_huge_count(struct sup << 759 struct shrink_control *sc) << 760 { << 761 struct shmem_sb_info *sbinfo = SHMEM_S << 762 return READ_ONCE(sbinfo->shrinklist_le << 763 } << 764 #else /* !CONFIG_TRANSPARENT_HUGEPAGE */ << 765 << 766 #define shmem_huge SHMEM_HUGE_DENY << 767 << 768 static unsigned long shmem_unused_huge_shrink( << 769 struct shrink_control *sc, uns << 770 { 184 { 771 return 0; !! 185 struct inode *inode = info->inode; 772 } !! 186 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); >> 187 unsigned long page = 0; >> 188 swp_entry_t *entry; >> 189 static const swp_entry_t unswapped = {0}; 773 190 774 static bool shmem_huge_global_enabled(struct i !! 191 if (sgp != SGP_WRITE && 775 loff_t write_end, bool shmem_h !! 192 ((loff_t) index << PAGE_CACHE_SHIFT) >= inode->i_size) 776 struct vm_area_struct *vma, un !! 193 return ERR_PTR(-EINVAL); 777 { << 778 return false; << 779 } << 780 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ << 781 194 782 /* !! 195 while (!(entry = shmem_swp_entry(info, index, &page))) { 783 * Somewhat like filemap_add_folio, but error !! 196 if (sgp == SGP_READ) 784 */ !! 197 return (swp_entry_t *) &unswapped; 785 static int shmem_add_to_page_cache(struct foli !! 198 /* 786 struct addr !! 199 * Test free_blocks against 1 not 0, since we have 1 data 787 pgoff_t ind !! 200 * page (and perhaps indirect index pages) yet to allocate: 788 { !! 201 * a waste to allocate index if we cannot allocate data. 789 XA_STATE_ORDER(xas, &mapping->i_pages, !! 202 */ 790 long nr = folio_nr_pages(folio); !! 203 spin_lock(&sbinfo->stat_lock); 791 !! 204 if (sbinfo->free_blocks <= 1) { 792 VM_BUG_ON_FOLIO(index != round_down(in !! 205 spin_unlock(&sbinfo->stat_lock); 793 VM_BUG_ON_FOLIO(!folio_test_locked(fol !! 206 return ERR_PTR(-ENOSPC); 794 VM_BUG_ON_FOLIO(!folio_test_swapbacked !! 207 } 795 !! 208 sbinfo->free_blocks--; 796 folio_ref_add(folio, nr); !! 209 inode->i_blocks += BLOCKS_PER_PAGE; 797 folio->mapping = mapping; !! 210 spin_unlock(&sbinfo->stat_lock); 798 folio->index = index; << 799 211 800 gfp &= GFP_RECLAIM_MASK; !! 212 spin_unlock(&info->lock); 801 folio_throttle_swaprate(folio, gfp); !! 213 page = get_zeroed_page(GFP_USER); >> 214 spin_lock(&info->lock); 802 215 803 do { !! 216 if (!page) { 804 xas_lock_irq(&xas); !! 217 shmem_free_block(inode); 805 if (expected != xas_find_confl !! 218 return ERR_PTR(-ENOMEM); 806 xas_set_err(&xas, -EEX << 807 goto unlock; << 808 } 219 } 809 if (expected && xas_find_confl !! 220 if (sgp != SGP_WRITE && 810 xas_set_err(&xas, -EEX !! 221 ((loff_t) index << PAGE_CACHE_SHIFT) >= inode->i_size) { 811 goto unlock; !! 222 entry = ERR_PTR(-EINVAL); >> 223 break; 812 } 224 } 813 xas_store(&xas, folio); !! 225 if (info->next_index <= index) 814 if (xas_error(&xas)) !! 226 info->next_index = index + 1; 815 goto unlock; << 816 if (folio_test_pmd_mappable(fo << 817 __lruvec_stat_mod_foli << 818 __lruvec_stat_mod_folio(folio, << 819 __lruvec_stat_mod_folio(folio, << 820 mapping->nrpages += nr; << 821 unlock: << 822 xas_unlock_irq(&xas); << 823 } while (xas_nomem(&xas, gfp)); << 824 << 825 if (xas_error(&xas)) { << 826 folio->mapping = NULL; << 827 folio_ref_sub(folio, nr); << 828 return xas_error(&xas); << 829 } 227 } 830 !! 228 if (page) { 831 return 0; !! 229 /* another task gave its page, or truncated the file */ 832 } !! 230 shmem_free_block(inode); 833 !! 231 free_page(page); 834 /* !! 232 } 835 * Somewhat like filemap_remove_folio, but sub !! 233 if (info->next_index <= index && !IS_ERR(entry)) 836 */ !! 234 info->next_index = index + 1; 837 static void shmem_delete_from_page_cache(struc !! 235 return entry; 838 { << 839 struct address_space *mapping = folio- << 840 long nr = folio_nr_pages(folio); << 841 int error; << 842 << 843 xa_lock_irq(&mapping->i_pages); << 844 error = shmem_replace_entry(mapping, f << 845 folio->mapping = NULL; << 846 mapping->nrpages -= nr; << 847 __lruvec_stat_mod_folio(folio, NR_FILE << 848 __lruvec_stat_mod_folio(folio, NR_SHME << 849 xa_unlock_irq(&mapping->i_pages); << 850 folio_put_refs(folio, nr); << 851 BUG_ON(error); << 852 } << 853 << 854 /* << 855 * Remove swap entry from page cache, free the << 856 * the number of pages being freed. 0 means en << 857 * being freed). << 858 */ << 859 static long shmem_free_swap(struct address_spa << 860 pgoff_t index, voi << 861 { << 862 int order = xa_get_order(&mapping->i_p << 863 void *old; << 864 << 865 old = xa_cmpxchg_irq(&mapping->i_pages << 866 if (old != radswap) << 867 return 0; << 868 free_swap_and_cache_nr(radix_to_swp_en << 869 << 870 return 1 << order; << 871 } 236 } 872 237 873 /* 238 /* 874 * Determine (in bytes) how many of the shmem !! 239 * shmem_free_swp - free some swap entries in a directory 875 * given offsets are swapped out. << 876 * 240 * 877 * This is safe to call without i_rwsem or the !! 241 * @dir: pointer to the directory 878 * as long as the inode doesn't go away and ra !! 242 * @edir: pointer after last entry of the directory 879 */ 243 */ 880 unsigned long shmem_partial_swap_usage(struct !! 244 static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir) 881 << 882 { 245 { 883 XA_STATE(xas, &mapping->i_pages, start !! 246 swp_entry_t *ptr; 884 struct page *page; !! 247 int freed = 0; 885 unsigned long swapped = 0; !! 248 886 unsigned long max = end - 1; !! 249 for (ptr = dir; ptr < edir; ptr++) { 887 !! 250 if (ptr->val) { 888 rcu_read_lock(); !! 251 free_swap_and_cache(*ptr); 889 xas_for_each(&xas, page, max) { !! 252 *ptr = (swp_entry_t){0}; 890 if (xas_retry(&xas, page)) !! 253 freed++; 891 continue; << 892 if (xa_is_value(page)) << 893 swapped += 1 << xas_ge << 894 if (xas.xa_index == max) << 895 break; << 896 if (need_resched()) { << 897 xas_pause(&xas); << 898 cond_resched_rcu(); << 899 } 254 } 900 } 255 } 901 rcu_read_unlock(); !! 256 return freed; 902 << 903 return swapped << PAGE_SHIFT; << 904 } 257 } 905 258 906 /* 259 /* 907 * Determine (in bytes) how many of the shmem !! 260 * shmem_truncate_direct - free the swap entries of a whole doubly 908 * given vma is swapped out. !! 261 * indirect block 909 * 262 * 910 * This is safe to call without i_rwsem or the !! 263 * @info: the info structure of the inode 911 * as long as the inode doesn't go away and ra !! 264 * @dir: pointer to the pointer to the block >> 265 * @start: offset to start from (in pages) >> 266 * @len: how many pages are stored in this block 912 */ 267 */ 913 unsigned long shmem_swap_usage(struct vm_area_ !! 268 static inline unsigned long >> 269 shmem_truncate_direct(struct shmem_inode_info *info, swp_entry_t ***dir, unsigned long start, unsigned long len) 914 { 270 { 915 struct inode *inode = file_inode(vma-> !! 271 swp_entry_t **last, **ptr; 916 struct shmem_inode_info *info = SHMEM_ !! 272 unsigned long off, freed_swp, freed = 0; 917 struct address_space *mapping = inode- << 918 unsigned long swapped; << 919 << 920 /* Be careful as we don't hold info->l << 921 swapped = READ_ONCE(info->swapped); << 922 << 923 /* << 924 * The easier cases are when the shmem << 925 * the vma maps it whole. Then we can << 926 * already track. << 927 */ << 928 if (!swapped) << 929 return 0; << 930 273 931 if (!vma->vm_pgoff && vma->vm_end - vm !! 274 last = *dir + (len + ENTRIES_PER_PAGE - 1) / ENTRIES_PER_PAGE; 932 return swapped << PAGE_SHIFT; !! 275 off = start % ENTRIES_PER_PAGE; 933 276 934 /* Here comes the more involved part * !! 277 for (ptr = *dir + start/ENTRIES_PER_PAGE; ptr < last; ptr++, off = 0) { 935 return shmem_partial_swap_usage(mappin !! 278 if (!*ptr) 936 vma->v !! 279 continue; 937 } << 938 280 939 /* !! 281 if (info->swapped) { 940 * SysV IPC SHM_UNLOCK restore Unevictable pag !! 282 freed_swp = shmem_free_swp(*ptr + off, 941 */ !! 283 *ptr + ENTRIES_PER_PAGE); 942 void shmem_unlock_mapping(struct address_space !! 284 info->swapped -= freed_swp; 943 { !! 285 freed += freed_swp; 944 struct folio_batch fbatch; !! 286 } 945 pgoff_t index = 0; << 946 287 947 folio_batch_init(&fbatch); !! 288 if (!off) { 948 /* !! 289 freed++; 949 * Minor point, but we might as well s !! 290 free_page((unsigned long) *ptr); 950 */ !! 291 *ptr = 0; 951 while (!mapping_unevictable(mapping) & !! 292 } 952 filemap_get_folios(mapping, &in << 953 check_move_unevictable_folios( << 954 folio_batch_release(&fbatch); << 955 cond_resched(); << 956 } 293 } 957 } << 958 294 959 static struct folio *shmem_get_partial_folio(s !! 295 if (!start) { 960 { !! 296 freed++; 961 struct folio *folio; !! 297 free_page((unsigned long) *dir); 962 !! 298 *dir = 0; 963 /* << 964 * At first avoid shmem_get_folio(,,,S << 965 * beyond i_size, and reports fallocat << 966 */ << 967 folio = filemap_get_entry(inode->i_map << 968 if (!folio) << 969 return folio; << 970 if (!xa_is_value(folio)) { << 971 folio_lock(folio); << 972 if (folio->mapping == inode->i << 973 return folio; << 974 /* The folio has been swapped << 975 folio_unlock(folio); << 976 folio_put(folio); << 977 } 299 } 978 /* !! 300 return freed; 979 * But read a folio back from swap if << 980 * (although in some cases this is jus << 981 */ << 982 folio = NULL; << 983 shmem_get_folio(inode, index, 0, &foli << 984 return folio; << 985 } 301 } 986 302 987 /* 303 /* 988 * Remove range of pages and swap entries from !! 304 * shmem_truncate_indirect - truncate an inode 989 * If !unfalloc, truncate or punch hole; if un !! 305 * >> 306 * @info: the info structure of the inode >> 307 * @index: the index to truncate >> 308 * >> 309 * This function locates the last doubly indirect block and calls >> 310 * then shmem_truncate_direct to do the real work 990 */ 311 */ 991 static void shmem_undo_range(struct inode *ino !! 312 static inline unsigned long 992 !! 313 shmem_truncate_indirect(struct shmem_inode_info *info, unsigned long index) 993 { 314 { 994 struct address_space *mapping = inode- !! 315 swp_entry_t ***base; 995 struct shmem_inode_info *info = SHMEM_ !! 316 unsigned long baseidx, start; 996 pgoff_t start = (lstart + PAGE_SIZE - !! 317 unsigned long len = info->next_index; 997 pgoff_t end = (lend + 1) >> PAGE_SHIFT !! 318 unsigned long freed; 998 struct folio_batch fbatch; << 999 pgoff_t indices[PAGEVEC_SIZE]; << 1000 struct folio *folio; << 1001 bool same_folio; << 1002 long nr_swaps_freed = 0; << 1003 pgoff_t index; << 1004 int i; << 1005 << 1006 if (lend == -1) << 1007 end = -1; /* unsigned, << 1008 << 1009 if (info->fallocend > start && info-> << 1010 info->fallocend = start; << 1011 << 1012 folio_batch_init(&fbatch); << 1013 index = start; << 1014 while (index < end && find_lock_entri << 1015 &fbatch, indices)) { << 1016 for (i = 0; i < folio_batch_c << 1017 folio = fbatch.folios << 1018 << 1019 if (xa_is_value(folio << 1020 if (unfalloc) << 1021 conti << 1022 nr_swaps_free << 1023 << 1024 continue; << 1025 } << 1026 << 1027 if (!unfalloc || !fol << 1028 truncate_inod << 1029 folio_unlock(folio); << 1030 } << 1031 folio_batch_remove_exceptiona << 1032 folio_batch_release(&fbatch); << 1033 cond_resched(); << 1034 } << 1035 << 1036 /* << 1037 * When undoing a failed fallocate, w << 1038 * zeroing and splitting below, but s << 1039 * folio when !uptodate indicates tha << 1040 * even when [lstart, lend] covers on << 1041 */ << 1042 if (unfalloc) << 1043 goto whole_folios; << 1044 319 1045 same_folio = (lstart >> PAGE_SHIFT) = !! 320 if (len <= SHMEM_NR_DIRECT) { 1046 folio = shmem_get_partial_folio(inode !! 321 info->next_index = index; 1047 if (folio) { !! 322 if (!info->swapped) 1048 same_folio = lend < folio_pos !! 323 return 0; 1049 folio_mark_dirty(folio); !! 324 freed = shmem_free_swp(info->i_direct + index, 1050 if (!truncate_inode_partial_f !! 325 info->i_direct + len); 1051 start = folio_next_in !! 326 info->swapped -= freed; 1052 if (same_folio) !! 327 return freed; 1053 end = folio-> << 1054 } << 1055 folio_unlock(folio); << 1056 folio_put(folio); << 1057 folio = NULL; << 1058 } 328 } 1059 329 1060 if (!same_folio) !! 330 if (len <= ENTRIES_PER_PAGEPAGE/2 + SHMEM_NR_DIRECT) { 1061 folio = shmem_get_partial_fol !! 331 len -= SHMEM_NR_DIRECT; 1062 if (folio) { !! 332 base = (swp_entry_t ***) &info->i_indirect; 1063 folio_mark_dirty(folio); !! 333 baseidx = SHMEM_NR_DIRECT; 1064 if (!truncate_inode_partial_f !! 334 } else { 1065 end = folio->index; !! 335 len -= ENTRIES_PER_PAGEPAGE/2 + SHMEM_NR_DIRECT; 1066 folio_unlock(folio); !! 336 BUG_ON(len > ENTRIES_PER_PAGEPAGE*ENTRIES_PER_PAGE/2); 1067 folio_put(folio); !! 337 baseidx = len - 1; >> 338 baseidx -= baseidx % ENTRIES_PER_PAGEPAGE; >> 339 base = (swp_entry_t ***) info->i_indirect + >> 340 ENTRIES_PER_PAGE/2 + baseidx/ENTRIES_PER_PAGEPAGE; >> 341 len -= baseidx; >> 342 baseidx += ENTRIES_PER_PAGEPAGE/2 + SHMEM_NR_DIRECT; 1068 } 343 } 1069 344 1070 whole_folios: !! 345 if (index > baseidx) { 1071 !! 346 info->next_index = index; 1072 index = start; !! 347 start = index - baseidx; 1073 while (index < end) { !! 348 } else { 1074 cond_resched(); !! 349 info->next_index = baseidx; 1075 !! 350 start = 0; 1076 if (!find_get_entries(mapping << 1077 indices)) { << 1078 /* If all gone or hol << 1079 if (index == start || << 1080 break; << 1081 /* But if truncating, << 1082 index = start; << 1083 continue; << 1084 } << 1085 for (i = 0; i < folio_batch_c << 1086 folio = fbatch.folios << 1087 << 1088 if (xa_is_value(folio << 1089 long swaps_fr << 1090 << 1091 if (unfalloc) << 1092 conti << 1093 swaps_freed = << 1094 if (!swaps_fr << 1095 /* Sw << 1096 index << 1097 break << 1098 } << 1099 nr_swaps_free << 1100 continue; << 1101 } << 1102 << 1103 folio_lock(folio); << 1104 << 1105 if (!unfalloc || !fol << 1106 if (folio_map << 1107 /* Pa << 1108 folio << 1109 index << 1110 break << 1111 } << 1112 VM_BUG_ON_FOL << 1113 << 1114 << 1115 if (!folio_te << 1116 trunc << 1117 } else if (tr << 1118 /* << 1119 * If << 1120 * th << 1121 * Ot << 1122 * dr << 1123 * ze << 1124 * is << 1125 */ << 1126 if (! << 1127 << 1128 << 1129 << 1130 } << 1131 } << 1132 } << 1133 folio_unlock(folio); << 1134 } << 1135 folio_batch_remove_exceptiona << 1136 folio_batch_release(&fbatch); << 1137 } 351 } 1138 !! 352 return *base? shmem_truncate_direct(info, base, start, len): 0; 1139 shmem_recalc_inode(inode, 0, -nr_swap << 1140 } << 1141 << 1142 void shmem_truncate_range(struct inode *inode << 1143 { << 1144 shmem_undo_range(inode, lstart, lend, << 1145 inode_set_mtime_to_ts(inode, inode_se << 1146 inode_inc_iversion(inode); << 1147 } 353 } 1148 EXPORT_SYMBOL_GPL(shmem_truncate_range); << 1149 354 1150 static int shmem_getattr(struct mnt_idmap *id !! 355 static void shmem_truncate(struct inode *inode) 1151 const struct path *p << 1152 u32 request_mask, un << 1153 { 356 { 1154 struct inode *inode = path->dentry->d << 1155 struct shmem_inode_info *info = SHMEM 357 struct shmem_inode_info *info = SHMEM_I(inode); >> 358 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); >> 359 unsigned long freed = 0; >> 360 unsigned long index; 1156 361 1157 if (info->alloced - info->swapped != !! 362 inode->i_ctime = inode->i_mtime = CURRENT_TIME; 1158 shmem_recalc_inode(inode, 0, !! 363 index = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; >> 364 if (index >= info->next_index) >> 365 return; 1159 366 1160 if (info->fsflags & FS_APPEND_FL) !! 367 spin_lock(&info->lock); 1161 stat->attributes |= STATX_ATT !! 368 while (index < info->next_index) 1162 if (info->fsflags & FS_IMMUTABLE_FL) !! 369 freed += shmem_truncate_indirect(info, index); 1163 stat->attributes |= STATX_ATT !! 370 BUG_ON(info->swapped > info->next_index); 1164 if (info->fsflags & FS_NODUMP_FL) !! 371 1165 stat->attributes |= STATX_ATT !! 372 if (inode->i_mapping->nrpages && (info->flags & SHMEM_PAGEIN)) { 1166 stat->attributes_mask |= (STATX_ATTR_ !! 373 /* 1167 STATX_ATTR_IMMUTABLE !! 374 * Call truncate_inode_pages again: racing shmem_unuse_inode 1168 STATX_ATTR_NODUMP); !! 375 * may have swizzled a page in from swap since vmtruncate or 1169 inode_lock_shared(inode); !! 376 * generic_delete_inode did it, before we lowered next_index. 1170 generic_fillattr(idmap, request_mask, !! 377 * Also, though shmem_getpage checks i_size before adding to 1171 inode_unlock_shared(inode); !! 378 * cache, no recheck after: so fix the narrow window there too. 1172 !! 379 */ 1173 if (shmem_huge_global_enabled(inode, !! 380 info->flags |= SHMEM_TRUNCATE; 1174 stat->blksize = HPAGE_PMD_SIZ !! 381 spin_unlock(&info->lock); 1175 !! 382 truncate_inode_pages(inode->i_mapping, inode->i_size); 1176 if (request_mask & STATX_BTIME) { !! 383 spin_lock(&info->lock); 1177 stat->result_mask |= STATX_BT !! 384 info->flags &= ~SHMEM_TRUNCATE; 1178 stat->btime.tv_sec = info->i_ << 1179 stat->btime.tv_nsec = info->i << 1180 } 385 } 1181 386 1182 return 0; !! 387 spin_unlock(&info->lock); >> 388 spin_lock(&sbinfo->stat_lock); >> 389 sbinfo->free_blocks += freed; >> 390 inode->i_blocks -= freed*BLOCKS_PER_PAGE; >> 391 spin_unlock(&sbinfo->stat_lock); 1183 } 392 } 1184 393 1185 static int shmem_setattr(struct mnt_idmap *id !! 394 static int shmem_notify_change(struct dentry *dentry, struct iattr *attr) 1186 struct dentry *dentr << 1187 { 395 { 1188 struct inode *inode = d_inode(dentry) !! 396 struct inode *inode = dentry->d_inode; 1189 struct shmem_inode_info *info = SHMEM !! 397 struct page *page = NULL; 1190 int error; 398 int error; 1191 bool update_mtime = false; << 1192 bool update_ctime = true; << 1193 << 1194 error = setattr_prepare(idmap, dentry << 1195 if (error) << 1196 return error; << 1197 399 1198 if ((info->seals & F_SEAL_EXEC) && (a !! 400 if (attr->ia_valid & ATTR_SIZE) { 1199 if ((inode->i_mode ^ attr->ia !! 401 if (attr->ia_size < inode->i_size) { 1200 return -EPERM; !! 402 /* 1201 } !! 403 * If truncating down to a partial page, then 1202 } !! 404 * if that page is already allocated, hold it 1203 !! 405 * in memory until the truncation is over, so 1204 if (S_ISREG(inode->i_mode) && (attr-> !! 406 * truncate_partial_page cannnot miss it were 1205 loff_t oldsize = inode->i_siz !! 407 * it assigned to swap. 1206 loff_t newsize = attr->ia_siz !! 408 */ 1207 !! 409 if (attr->ia_size & (PAGE_CACHE_SIZE-1)) { 1208 /* protected by i_rwsem */ !! 410 (void) shmem_getpage(inode, 1209 if ((newsize < oldsize && (in !! 411 attr->ia_size>>PAGE_CACHE_SHIFT, 1210 (newsize > oldsize && (in !! 412 &page, SGP_READ); 1211 return -EPERM; !! 413 } 1212 !! 414 /* 1213 if (newsize != oldsize) { !! 415 * Reset SHMEM_PAGEIN flag so that shmem_truncate can 1214 error = shmem_reacct_ !! 416 * detect if any pages might have been added to cache 1215 oldsi !! 417 * after truncate_inode_pages. But we needn't bother 1216 if (error) !! 418 * if it's being fully truncated to zero-length: the 1217 return error; !! 419 * nrpages check is efficient enough in that case. 1218 i_size_write(inode, n !! 420 */ 1219 update_mtime = true; !! 421 if (attr->ia_size) { 1220 } else { !! 422 struct shmem_inode_info *info = SHMEM_I(inode); 1221 update_ctime = false; !! 423 spin_lock(&info->lock); 1222 } !! 424 info->flags &= ~SHMEM_PAGEIN; 1223 if (newsize <= oldsize) { !! 425 spin_unlock(&info->lock); 1224 loff_t holebegin = ro !! 426 } 1225 if (oldsize > holebeg << 1226 unmap_mapping << 1227 << 1228 if (info->alloced) << 1229 shmem_truncat << 1230 << 1231 /* unmap again to rem << 1232 if (oldsize > holebeg << 1233 unmap_mapping << 1234 << 1235 } 427 } 1236 } 428 } 1237 429 1238 if (is_quota_modification(idmap, inod !! 430 error = inode_change_ok(inode, attr); 1239 error = dquot_initialize(inod !! 431 if (!error) 1240 if (error) !! 432 error = inode_setattr(inode, attr); 1241 return error; !! 433 if (page) 1242 } !! 434 page_cache_release(page); 1243 << 1244 /* Transfer quota accounting */ << 1245 if (i_uid_needs_update(idmap, attr, i << 1246 i_gid_needs_update(idmap, attr, i << 1247 error = dquot_transfer(idmap, << 1248 if (error) << 1249 return error; << 1250 } << 1251 << 1252 setattr_copy(idmap, inode, attr); << 1253 if (attr->ia_valid & ATTR_MODE) << 1254 error = posix_acl_chmod(idmap << 1255 if (!error && update_ctime) { << 1256 inode_set_ctime_current(inode << 1257 if (update_mtime) << 1258 inode_set_mtime_to_ts << 1259 inode_inc_iversion(inode); << 1260 } << 1261 return error; 435 return error; 1262 } 436 } 1263 437 1264 static void shmem_evict_inode(struct inode *i !! 438 static void shmem_delete_inode(struct inode *inode) 1265 { 439 { 1266 struct shmem_inode_info *info = SHMEM << 1267 struct shmem_sb_info *sbinfo = SHMEM_ 440 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); 1268 size_t freed = 0; !! 441 struct shmem_inode_info *info = SHMEM_I(inode); 1269 442 1270 if (shmem_mapping(inode->i_mapping)) !! 443 if (inode->i_op->truncate == shmem_truncate) { 1271 shmem_unacct_size(info->flags !! 444 spin_lock(&shmem_ilock); >> 445 list_del(&info->list); >> 446 spin_unlock(&shmem_ilock); 1272 inode->i_size = 0; 447 inode->i_size = 0; 1273 mapping_set_exiting(inode->i_ !! 448 shmem_truncate(inode); 1274 shmem_truncate_range(inode, 0 << 1275 if (!list_empty(&info->shrink << 1276 spin_lock(&sbinfo->sh << 1277 if (!list_empty(&info << 1278 list_del_init << 1279 sbinfo->shrin << 1280 } << 1281 spin_unlock(&sbinfo-> << 1282 } << 1283 while (!list_empty(&info->swa << 1284 /* Wait while shmem_u << 1285 wait_var_event(&info- << 1286 !atomi << 1287 mutex_lock(&shmem_swa << 1288 /* ...but beware of t << 1289 if (!atomic_read(&inf << 1290 list_del_init << 1291 mutex_unlock(&shmem_s << 1292 } << 1293 } 449 } 1294 !! 450 BUG_ON(inode->i_blocks); 1295 simple_xattrs_free(&info->xattrs, sbi !! 451 spin_lock(&sbinfo->stat_lock); 1296 shmem_free_inode(inode->i_sb, freed); !! 452 sbinfo->free_inodes++; 1297 WARN_ON(inode->i_blocks); !! 453 spin_unlock(&sbinfo->stat_lock); 1298 clear_inode(inode); 454 clear_inode(inode); 1299 #ifdef CONFIG_TMPFS_QUOTA << 1300 dquot_free_inode(inode); << 1301 dquot_drop(inode); << 1302 #endif << 1303 } 455 } 1304 456 1305 static int shmem_find_swap_entries(struct add !! 457 static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *dir, swp_entry_t *edir) 1306 pgoff_t st << 1307 pgoff_t *i << 1308 { << 1309 XA_STATE(xas, &mapping->i_pages, star << 1310 struct folio *folio; << 1311 swp_entry_t entry; << 1312 << 1313 rcu_read_lock(); << 1314 xas_for_each(&xas, folio, ULONG_MAX) << 1315 if (xas_retry(&xas, folio)) << 1316 continue; << 1317 << 1318 if (!xa_is_value(folio)) << 1319 continue; << 1320 << 1321 entry = radix_to_swp_entry(fo << 1322 /* << 1323 * swapin error entries can b << 1324 * deliberately ignored here << 1325 */ << 1326 if (swp_type(entry) != type) << 1327 continue; << 1328 << 1329 indices[folio_batch_count(fba << 1330 if (!folio_batch_add(fbatch, << 1331 break; << 1332 << 1333 if (need_resched()) { << 1334 xas_pause(&xas); << 1335 cond_resched_rcu(); << 1336 } << 1337 } << 1338 rcu_read_unlock(); << 1339 << 1340 return xas.xa_index; << 1341 } << 1342 << 1343 /* << 1344 * Move the swapped pages for an inode to pag << 1345 * of pages swapped in, or the error in case << 1346 */ << 1347 static int shmem_unuse_swap_entries(struct in << 1348 struct folio_batch *fbatch, p << 1349 { 458 { 1350 int i = 0; !! 459 swp_entry_t *ptr; 1351 int ret = 0; << 1352 int error = 0; << 1353 struct address_space *mapping = inode << 1354 << 1355 for (i = 0; i < folio_batch_count(fba << 1356 struct folio *folio = fbatch- << 1357 460 1358 if (!xa_is_value(folio)) !! 461 for (ptr = dir; ptr < edir; ptr++) { 1359 continue; !! 462 if (ptr->val == entry.val) 1360 error = shmem_swapin_folio(in !! 463 return ptr - dir; 1361 mappi << 1362 if (error == 0) { << 1363 folio_unlock(folio); << 1364 folio_put(folio); << 1365 ret++; << 1366 } << 1367 if (error == -ENOMEM) << 1368 break; << 1369 error = 0; << 1370 } 464 } 1371 return error ? error : ret; !! 465 return -1; 1372 } << 1373 << 1374 /* << 1375 * If swap found in inode, free it and move p << 1376 */ << 1377 static int shmem_unuse_inode(struct inode *in << 1378 { << 1379 struct address_space *mapping = inode << 1380 pgoff_t start = 0; << 1381 struct folio_batch fbatch; << 1382 pgoff_t indices[PAGEVEC_SIZE]; << 1383 int ret = 0; << 1384 << 1385 do { << 1386 folio_batch_init(&fbatch); << 1387 shmem_find_swap_entries(mappi << 1388 if (folio_batch_count(&fbatch << 1389 ret = 0; << 1390 break; << 1391 } << 1392 << 1393 ret = shmem_unuse_swap_entrie << 1394 if (ret < 0) << 1395 break; << 1396 << 1397 start = indices[folio_batch_c << 1398 } while (true); << 1399 << 1400 return ret; << 1401 } 466 } 1402 467 1403 /* !! 468 static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page) 1404 * Read all the shared memory data that resid << 1405 * device 'type' back into memory, so the swa << 1406 * unused. << 1407 */ << 1408 int shmem_unuse(unsigned int type) << 1409 { 469 { 1410 struct shmem_inode_info *info, *next; !! 470 struct inode *inode; 1411 int error = 0; !! 471 struct address_space *mapping; >> 472 swp_entry_t *ptr; >> 473 unsigned long idx; >> 474 int offset; 1412 475 1413 if (list_empty(&shmem_swaplist)) !! 476 idx = 0; 1414 return 0; !! 477 ptr = info->i_direct; >> 478 spin_lock(&info->lock); >> 479 offset = info->next_index; >> 480 if (offset > SHMEM_NR_DIRECT) >> 481 offset = SHMEM_NR_DIRECT; >> 482 offset = shmem_find_swp(entry, ptr, ptr + offset); >> 483 if (offset >= 0) >> 484 goto found; 1415 485 1416 mutex_lock(&shmem_swaplist_mutex); !! 486 for (idx = SHMEM_NR_DIRECT; idx < info->next_index; 1417 list_for_each_entry_safe(info, next, !! 487 idx += ENTRIES_PER_PAGE) { 1418 if (!info->swapped) { !! 488 ptr = shmem_swp_entry(info, idx, NULL); 1419 list_del_init(&info-> !! 489 if (!ptr) 1420 continue; 490 continue; 1421 } !! 491 offset = info->next_index - idx; 1422 /* !! 492 if (offset > ENTRIES_PER_PAGE) 1423 * Drop the swaplist mutex wh !! 493 offset = ENTRIES_PER_PAGE; 1424 * but before doing so, make !! 494 offset = shmem_find_swp(entry, ptr, ptr + offset); 1425 * remove placeholder inode f !! 495 if (offset >= 0) 1426 * (igrab() would protect fro !! 496 goto found; 1427 */ << 1428 atomic_inc(&info->stop_evicti << 1429 mutex_unlock(&shmem_swaplist_ << 1430 << 1431 error = shmem_unuse_inode(&in << 1432 cond_resched(); << 1433 << 1434 mutex_lock(&shmem_swaplist_mu << 1435 next = list_next_entry(info, << 1436 if (!info->swapped) << 1437 list_del_init(&info-> << 1438 if (atomic_dec_and_test(&info << 1439 wake_up_var(&info->st << 1440 if (error) << 1441 break; << 1442 } << 1443 mutex_unlock(&shmem_swaplist_mutex); << 1444 << 1445 return error; << 1446 } << 1447 << 1448 /* << 1449 * Move the page from the page cache to the s << 1450 */ << 1451 static int shmem_writepage(struct page *page, << 1452 { << 1453 struct folio *folio = page_folio(page << 1454 struct address_space *mapping = folio << 1455 struct inode *inode = mapping->host; << 1456 struct shmem_inode_info *info = SHMEM << 1457 struct shmem_sb_info *sbinfo = SHMEM_ << 1458 swp_entry_t swap; << 1459 pgoff_t index; << 1460 int nr_pages; << 1461 bool split = false; << 1462 << 1463 /* << 1464 * Our capabilities prevent regular w << 1465 * shmem_writepage; but a stacking fi << 1466 * its underlying filesystem, in whic << 1467 * swap only in response to memory pr << 1468 * threads or sync. << 1469 */ << 1470 if (WARN_ON_ONCE(!wbc->for_reclaim)) << 1471 goto redirty; << 1472 << 1473 if (WARN_ON_ONCE((info->flags & VM_LO << 1474 goto redirty; << 1475 << 1476 if (!total_swap_pages) << 1477 goto redirty; << 1478 << 1479 /* << 1480 * If CONFIG_THP_SWAP is not enabled, << 1481 * split when swapping. << 1482 * << 1483 * And shrinkage of pages beyond i_si << 1484 * swapout of a large folio crossing << 1485 * (unless fallocate has been used to << 1486 */ << 1487 if (folio_test_large(folio)) { << 1488 index = shmem_fallocend(inode << 1489 DIV_ROUND_UP(i_size_r << 1490 if ((index > folio->index && << 1491 !IS_ENABLED(CONFIG_THP_SW << 1492 split = true; << 1493 } << 1494 << 1495 if (split) { << 1496 try_split: << 1497 /* Ensure the subpages are st << 1498 folio_test_set_dirty(folio); << 1499 if (split_huge_page_to_list_t << 1500 goto redirty; << 1501 folio = page_folio(page); << 1502 folio_clear_dirty(folio); << 1503 } << 1504 << 1505 index = folio->index; << 1506 nr_pages = folio_nr_pages(folio); << 1507 << 1508 /* << 1509 * This is somewhat ridiculous, but w << 1510 * value into swapfile.c, the only wa << 1511 * fallocated folio arriving here is << 1512 * << 1513 * That's okay for a folio already fa << 1514 * not yet completed the fallocation, << 1515 * of this folio in case we have to u << 1516 * good idea to continue anyway, once << 1517 * reactivate the folio, and let shme << 1518 */ << 1519 if (!folio_test_uptodate(folio)) { << 1520 if (inode->i_private) { << 1521 struct shmem_falloc * << 1522 spin_lock(&inode->i_l << 1523 shmem_falloc = inode- << 1524 if (shmem_falloc && << 1525 !shmem_falloc->wa << 1526 index >= shmem_fa << 1527 index < shmem_fal << 1528 shmem_falloc- << 1529 else << 1530 shmem_falloc << 1531 spin_unlock(&inode->i << 1532 if (shmem_falloc) << 1533 goto redirty; << 1534 } << 1535 folio_zero_range(folio, 0, fo << 1536 flush_dcache_folio(folio); << 1537 folio_mark_uptodate(folio); << 1538 } << 1539 << 1540 swap = folio_alloc_swap(folio); << 1541 if (!swap.val) { << 1542 if (nr_pages > 1) << 1543 goto try_split; << 1544 << 1545 goto redirty; << 1546 } 497 } 1547 !! 498 spin_unlock(&info->lock); >> 499 return 0; >> 500 found: >> 501 idx += offset; >> 502 inode = info->inode; >> 503 mapping = inode->i_mapping; >> 504 delete_from_swap_cache(page); >> 505 if (add_to_page_cache_unique(page, >> 506 mapping, idx, page_hash(mapping, idx)) == 0) { >> 507 info->flags |= SHMEM_PAGEIN; >> 508 ptr[offset].val = 0; >> 509 info->swapped--; >> 510 } else if (add_to_swap_cache(page, entry) != 0) >> 511 BUG(); >> 512 spin_unlock(&info->lock); >> 513 SetPageUptodate(page); 1548 /* 514 /* 1549 * Add inode to shmem_unuse()'s list !! 515 * Decrement swap count even when the entry is left behind: 1550 * if it's not already there. Do it !! 516 * try_to_unuse will skip over mms, then reincrement count. 1551 * moved to swap cache, when its page << 1552 * the inode from eviction. But don' << 1553 * we've incremented swapped, because << 1554 * prune a !swapped inode from the sw << 1555 */ 517 */ 1556 mutex_lock(&shmem_swaplist_mutex); !! 518 swap_free(entry); 1557 if (list_empty(&info->swaplist)) !! 519 return 1; 1558 list_add(&info->swaplist, &sh << 1559 << 1560 if (add_to_swap_cache(folio, swap, << 1561 __GFP_HIGH | __GFP_NO << 1562 NULL) == 0) { << 1563 shmem_recalc_inode(inode, 0, << 1564 swap_shmem_alloc(swap, nr_pag << 1565 shmem_delete_from_page_cache( << 1566 << 1567 mutex_unlock(&shmem_swaplist_ << 1568 BUG_ON(folio_mapped(folio)); << 1569 return swap_writepage(&folio- << 1570 } << 1571 << 1572 mutex_unlock(&shmem_swaplist_mutex); << 1573 put_swap_folio(folio, swap); << 1574 redirty: << 1575 folio_mark_dirty(folio); << 1576 if (wbc->for_reclaim) << 1577 return AOP_WRITEPAGE_ACTIVATE << 1578 folio_unlock(folio); << 1579 return 0; << 1580 } << 1581 << 1582 #if defined(CONFIG_NUMA) && defined(CONFIG_TM << 1583 static void shmem_show_mpol(struct seq_file * << 1584 { << 1585 char buffer[64]; << 1586 << 1587 if (!mpol || mpol->mode == MPOL_DEFAU << 1588 return; /* show nothi << 1589 << 1590 mpol_to_str(buffer, sizeof(buffer), m << 1591 << 1592 seq_printf(seq, ",mpol=%s", buffer); << 1593 } << 1594 << 1595 static struct mempolicy *shmem_get_sbmpol(str << 1596 { << 1597 struct mempolicy *mpol = NULL; << 1598 if (sbinfo->mpol) { << 1599 raw_spin_lock(&sbinfo->stat_l << 1600 mpol = sbinfo->mpol; << 1601 mpol_get(mpol); << 1602 raw_spin_unlock(&sbinfo->stat << 1603 } << 1604 return mpol; << 1605 } << 1606 #else /* !CONFIG_NUMA || !CONFIG_TMPFS */ << 1607 static inline void shmem_show_mpol(struct seq << 1608 { << 1609 } << 1610 static inline struct mempolicy *shmem_get_sbm << 1611 { << 1612 return NULL; << 1613 } << 1614 #endif /* CONFIG_NUMA && CONFIG_TMPFS */ << 1615 << 1616 static struct mempolicy *shmem_get_pgoff_poli << 1617 pgoff_t index, unsign << 1618 << 1619 static struct folio *shmem_swapin_cluster(swp << 1620 struct shmem_inode_in << 1621 { << 1622 struct mempolicy *mpol; << 1623 pgoff_t ilx; << 1624 struct folio *folio; << 1625 << 1626 mpol = shmem_get_pgoff_policy(info, i << 1627 folio = swap_cluster_readahead(swap, << 1628 mpol_cond_put(mpol); << 1629 << 1630 return folio; << 1631 } 520 } 1632 521 1633 /* 522 /* 1634 * Make sure huge_gfp is always more limited !! 523 * shmem_unuse() search for an eventually swapped out shmem page. 1635 * Some of the flags set permissions, while o << 1636 */ 524 */ 1637 static gfp_t limit_gfp_mask(gfp_t huge_gfp, g !! 525 int shmem_unuse(swp_entry_t entry, struct page *page) 1638 { << 1639 gfp_t allowflags = __GFP_IO | __GFP_F << 1640 gfp_t denyflags = __GFP_NOWARN | __GF << 1641 gfp_t zoneflags = limit_gfp & GFP_ZON << 1642 gfp_t result = huge_gfp & ~(allowflag << 1643 << 1644 /* Allow allocations only from the or << 1645 result |= zoneflags; << 1646 << 1647 /* << 1648 * Minimize the result gfp by taking << 1649 * and the intersection of the allow << 1650 */ << 1651 result |= (limit_gfp & denyflags); << 1652 result |= (huge_gfp & limit_gfp) & al << 1653 << 1654 return result; << 1655 } << 1656 << 1657 #ifdef CONFIG_TRANSPARENT_HUGEPAGE << 1658 unsigned long shmem_allowable_huge_orders(str << 1659 struct vm_are << 1660 loff_t write_ << 1661 { << 1662 unsigned long mask = READ_ONCE(huge_s << 1663 unsigned long within_size_orders = RE << 1664 unsigned long vm_flags = vma ? vma->v << 1665 bool global_huge; << 1666 loff_t i_size; << 1667 int order; << 1668 << 1669 if (thp_disabled_by_hw() || (vma && v << 1670 return 0; << 1671 << 1672 global_huge = shmem_huge_global_enabl << 1673 shmem << 1674 if (!vma || !vma_is_anon_shmem(vma)) << 1675 /* << 1676 * For tmpfs, we now only sup << 1677 * is enabled, otherwise fall << 1678 */ << 1679 return global_huge ? BIT(HPAG << 1680 } << 1681 << 1682 /* << 1683 * Following the 'deny' semantics of << 1684 * option off from all mounts. << 1685 */ << 1686 if (shmem_huge == SHMEM_HUGE_DENY) << 1687 return 0; << 1688 << 1689 /* << 1690 * Only allow inherit orders if the t << 1691 * means non-PMD sized THP can not ov << 1692 */ << 1693 if (shmem_huge == SHMEM_HUGE_FORCE) << 1694 return READ_ONCE(huge_shmem_o << 1695 << 1696 /* Allow mTHP that will be fully with << 1697 order = highest_order(within_size_ord << 1698 while (within_size_orders) { << 1699 index = round_up(index + 1, o << 1700 i_size = round_up(i_size_read << 1701 if (i_size >> PAGE_SHIFT >= i << 1702 mask |= within_size_o << 1703 break; << 1704 } << 1705 << 1706 order = next_order(&within_si << 1707 } << 1708 << 1709 if (vm_flags & VM_HUGEPAGE) << 1710 mask |= READ_ONCE(huge_shmem_ << 1711 << 1712 if (global_huge) << 1713 mask |= READ_ONCE(huge_shmem_ << 1714 << 1715 return THP_ORDERS_ALL_FILE_DEFAULT & << 1716 } << 1717 << 1718 static unsigned long shmem_suitable_orders(st << 1719 st << 1720 un << 1721 { 526 { 1722 struct vm_area_struct *vma = vmf ? vm !! 527 struct list_head *p; 1723 pgoff_t aligned_index; !! 528 struct shmem_inode_info *info; 1724 unsigned long pages; !! 529 int found = 0; 1725 int order; << 1726 530 1727 if (vma) { !! 531 spin_lock(&shmem_ilock); 1728 orders = thp_vma_suitable_ord !! 532 list_for_each(p, &shmem_inodes) { 1729 if (!orders) !! 533 info = list_entry(p, struct shmem_inode_info, list); 1730 return 0; << 1731 } << 1732 534 1733 /* Find the highest order that can ad !! 535 if (info->swapped && shmem_unuse_inode(info, entry, page)) { 1734 order = highest_order(orders); !! 536 /* move head to start search for next from here */ 1735 while (orders) { !! 537 list_move_tail(&shmem_inodes, &info->list); 1736 pages = 1UL << order; !! 538 found = 1; 1737 aligned_index = round_down(in << 1738 /* << 1739 * Check for conflict before << 1740 * Conflict might be that a h << 1741 * and added to page cache by << 1742 * is already at least one sm << 1743 * Be careful to retry when a << 1744 * Elsewhere -EEXIST would be << 1745 */ << 1746 if (!xa_find(&mapping->i_page << 1747 aligned_index + << 1748 break; 539 break; 1749 order = next_order(&orders, o << 1750 } << 1751 << 1752 return orders; << 1753 } << 1754 #else << 1755 static unsigned long shmem_suitable_orders(st << 1756 st << 1757 un << 1758 { << 1759 return 0; << 1760 } << 1761 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ << 1762 << 1763 static struct folio *shmem_alloc_folio(gfp_t << 1764 struct shmem_inode_info *info << 1765 { << 1766 struct mempolicy *mpol; << 1767 pgoff_t ilx; << 1768 struct folio *folio; << 1769 << 1770 mpol = shmem_get_pgoff_policy(info, i << 1771 folio = folio_alloc_mpol(gfp, order, << 1772 mpol_cond_put(mpol); << 1773 << 1774 return folio; << 1775 } << 1776 << 1777 static struct folio *shmem_alloc_and_add_foli << 1778 gfp_t gfp, struct inode *inod << 1779 struct mm_struct *fault_mm, u << 1780 { << 1781 struct address_space *mapping = inode << 1782 struct shmem_inode_info *info = SHMEM << 1783 unsigned long suitable_orders = 0; << 1784 struct folio *folio = NULL; << 1785 long pages; << 1786 int error, order; << 1787 << 1788 if (!IS_ENABLED(CONFIG_TRANSPARENT_HU << 1789 orders = 0; << 1790 << 1791 if (orders > 0) { << 1792 suitable_orders = shmem_suita << 1793 << 1794 << 1795 order = highest_order(suitabl << 1796 while (suitable_orders) { << 1797 pages = 1UL << order; << 1798 index = round_down(in << 1799 folio = shmem_alloc_f << 1800 if (folio) << 1801 goto allocate << 1802 << 1803 if (pages == HPAGE_PM << 1804 count_vm_even << 1805 count_mthp_stat(order << 1806 order = next_order(&s << 1807 } << 1808 } else { << 1809 pages = 1; << 1810 folio = shmem_alloc_folio(gfp << 1811 } << 1812 if (!folio) << 1813 return ERR_PTR(-ENOMEM); << 1814 << 1815 allocated: << 1816 __folio_set_locked(folio); << 1817 __folio_set_swapbacked(folio); << 1818 << 1819 gfp &= GFP_RECLAIM_MASK; << 1820 error = mem_cgroup_charge(folio, faul << 1821 if (error) { << 1822 if (xa_find(&mapping->i_pages << 1823 index + pages << 1824 error = -EEXIST; << 1825 } else if (pages > 1) { << 1826 if (pages == HPAGE_PM << 1827 count_vm_even << 1828 count_vm_even << 1829 } << 1830 count_mthp_stat(folio << 1831 count_mthp_stat(folio << 1832 } << 1833 goto unlock; << 1834 } << 1835 << 1836 error = shmem_add_to_page_cache(folio << 1837 if (error) << 1838 goto unlock; << 1839 << 1840 error = shmem_inode_acct_blocks(inode << 1841 if (error) { << 1842 struct shmem_sb_info *sbinfo << 1843 long freed; << 1844 /* << 1845 * Try to reclaim some space << 1846 * large folios beyond i_size << 1847 */ << 1848 shmem_unused_huge_shrink(sbin << 1849 /* << 1850 * And do a shmem_recalc_inod << 1851 * except our folio is there << 1852 */ << 1853 spin_lock(&info->lock); << 1854 freed = pages + info->alloced << 1855 READ_ONCE(mapping->nr << 1856 if (freed > 0) << 1857 info->alloced -= free << 1858 spin_unlock(&info->lock); << 1859 if (freed > 0) << 1860 shmem_inode_unacct_bl << 1861 error = shmem_inode_acct_bloc << 1862 if (error) { << 1863 filemap_remove_folio( << 1864 goto unlock; << 1865 } 540 } 1866 } 541 } 1867 !! 542 spin_unlock(&shmem_ilock); 1868 shmem_recalc_inode(inode, pages, 0); !! 543 return found; 1869 folio_add_lru(folio); << 1870 return folio; << 1871 << 1872 unlock: << 1873 folio_unlock(folio); << 1874 folio_put(folio); << 1875 return ERR_PTR(error); << 1876 } 544 } 1877 545 1878 /* 546 /* 1879 * When a page is moved from swapcache to shm !! 547 * Move the page from the page cache to the swap cache. 1880 * usual swapin of shmem_get_folio_gfp(), or << 1881 * shmem_unuse_inode()), it may have been rea << 1882 * ignorance of the mapping it belongs to. I << 1883 * constraints (like the gma500 GEM driver, w << 1884 * we may need to copy to a suitable page bef << 1885 * << 1886 * In a future release, this may well be exte << 1887 * NUMA mempolicy, and applied also to anonym << 1888 * but for now it is a simple matter of zone. << 1889 */ 548 */ 1890 static bool shmem_should_replace_folio(struct !! 549 static int shmem_writepage(struct page *page) 1891 { 550 { 1892 return folio_zonenum(folio) > gfp_zon !! 551 struct shmem_inode_info *info; 1893 } !! 552 swp_entry_t *entry, swap; 1894 !! 553 struct address_space *mapping; 1895 static int shmem_replace_folio(struct folio * !! 554 unsigned long index; 1896 struct shmem_ !! 555 struct inode *inode; 1897 struct vm_are << 1898 { << 1899 struct folio *new, *old = *foliop; << 1900 swp_entry_t entry = old->swap; << 1901 struct address_space *swap_mapping = << 1902 pgoff_t swap_index = swap_cache_index << 1903 XA_STATE(xas, &swap_mapping->i_pages, << 1904 int nr_pages = folio_nr_pages(old); << 1905 int error = 0, i; << 1906 556 1907 /* !! 557 BUG_ON(!PageLocked(page)); 1908 * We have arrived here because our z !! 558 if (!PageLaunder(page)) 1909 * limit chance of success by further !! 559 goto fail; 1910 */ !! 560 1911 gfp &= ~GFP_CONSTRAINT_MASK; !! 561 mapping = page->mapping; 1912 #ifdef CONFIG_TRANSPARENT_HUGEPAGE !! 562 index = page->index; 1913 if (nr_pages > 1) { !! 563 inode = mapping->host; 1914 gfp_t huge_gfp = vma_thp_gfp_ !! 564 info = SHMEM_I(inode); >> 565 if (info->flags & VM_LOCKED) >> 566 goto fail; >> 567 getswap: >> 568 swap = get_swap_page(); >> 569 if (!swap.val) >> 570 goto fail; 1915 571 1916 gfp = limit_gfp_mask(huge_gfp !! 572 spin_lock(&info->lock); >> 573 if (index >= info->next_index) { >> 574 BUG_ON(!(info->flags & SHMEM_TRUNCATE)); >> 575 spin_unlock(&info->lock); >> 576 swap_free(swap); >> 577 goto fail; 1917 } 578 } 1918 #endif !! 579 entry = shmem_swp_entry(info, index, NULL); 1919 !! 580 BUG_ON(!entry); 1920 new = shmem_alloc_folio(gfp, folio_or !! 581 BUG_ON(entry->val); 1921 if (!new) << 1922 return -ENOMEM; << 1923 << 1924 folio_ref_add(new, nr_pages); << 1925 folio_copy(new, old); << 1926 flush_dcache_folio(new); << 1927 << 1928 __folio_set_locked(new); << 1929 __folio_set_swapbacked(new); << 1930 folio_mark_uptodate(new); << 1931 new->swap = entry; << 1932 folio_set_swapcache(new); << 1933 << 1934 /* Swap cache still stores N entries << 1935 xa_lock_irq(&swap_mapping->i_pages); << 1936 for (i = 0; i < nr_pages; i++) { << 1937 void *item = xas_load(&xas); << 1938 << 1939 if (item != old) { << 1940 error = -ENOENT; << 1941 break; << 1942 } << 1943 582 1944 xas_store(&xas, new); !! 583 /* Remove it from the page cache */ 1945 xas_next(&xas); !! 584 remove_inode_page(page); 1946 } !! 585 page_cache_release(page); 1947 if (!error) { << 1948 mem_cgroup_replace_folio(old, << 1949 __lruvec_stat_mod_folio(new, << 1950 __lruvec_stat_mod_folio(new, << 1951 __lruvec_stat_mod_folio(old, << 1952 __lruvec_stat_mod_folio(old, << 1953 } << 1954 xa_unlock_irq(&swap_mapping->i_pages) << 1955 586 1956 if (unlikely(error)) { !! 587 /* Add it to the swap cache */ >> 588 if (add_to_swap_cache(page, swap) != 0) { 1957 /* 589 /* 1958 * Is this possible? I think !! 590 * Raced with "speculative" read_swap_cache_async. 1959 * check both the swapcache f !! 591 * Add page back to page cache, unref swap, try again. 1960 * after getting the folio lo << 1961 * Reverse old to newpage for << 1962 */ 592 */ 1963 old = new; !! 593 add_to_page_cache_locked(page, mapping, index); 1964 } else { !! 594 info->flags |= SHMEM_PAGEIN; 1965 folio_add_lru(new); !! 595 spin_unlock(&info->lock); 1966 *foliop = new; !! 596 swap_free(swap); 1967 } !! 597 goto getswap; 1968 << 1969 folio_clear_swapcache(old); << 1970 old->private = NULL; << 1971 << 1972 folio_unlock(old); << 1973 /* << 1974 * The old folio are removed from swa << 1975 * reference, as well as one temporar << 1976 * cache. << 1977 */ << 1978 folio_put_refs(old, nr_pages + 1); << 1979 return error; << 1980 } << 1981 << 1982 static void shmem_set_folio_swapin_error(stru << 1983 stru << 1984 { << 1985 struct address_space *mapping = inode << 1986 swp_entry_t swapin_error; << 1987 void *old; << 1988 int nr_pages; << 1989 << 1990 swapin_error = make_poisoned_swp_entr << 1991 old = xa_cmpxchg_irq(&mapping->i_page << 1992 swp_to_radix_ent << 1993 swp_to_radix_ent << 1994 if (old != swp_to_radix_entry(swap)) << 1995 return; << 1996 << 1997 nr_pages = folio_nr_pages(folio); << 1998 folio_wait_writeback(folio); << 1999 delete_from_swap_cache(folio); << 2000 /* << 2001 * Don't treat swapin error folio as << 2002 * won't be 0 when inode is released << 2003 * in shmem_evict_inode(). << 2004 */ << 2005 shmem_recalc_inode(inode, -nr_pages, << 2006 swap_free_nr(swap, nr_pages); << 2007 } << 2008 << 2009 static int shmem_split_large_entry(struct ino << 2010 swp_entry_ << 2011 { << 2012 struct address_space *mapping = inode << 2013 XA_STATE_ORDER(xas, &mapping->i_pages << 2014 void *alloced_shadow = NULL; << 2015 int alloced_order = 0, i; << 2016 << 2017 /* Convert user data gfp flags to xar << 2018 gfp &= GFP_RECLAIM_MASK; << 2019 << 2020 for (;;) { << 2021 int order = -1, split_order = << 2022 void *old = NULL; << 2023 << 2024 xas_lock_irq(&xas); << 2025 old = xas_load(&xas); << 2026 if (!xa_is_value(old) || swp_ << 2027 xas_set_err(&xas, -EE << 2028 goto unlock; << 2029 } << 2030 << 2031 order = xas_get_order(&xas); << 2032 << 2033 /* Swap entry may have change << 2034 if (alloced_order && << 2035 (old != alloced_shadow || << 2036 xas_destroy(&xas); << 2037 alloced_order = 0; << 2038 } << 2039 << 2040 /* Try to split large swap en << 2041 if (order > 0) { << 2042 if (!alloced_order) { << 2043 split_order = << 2044 goto unlock; << 2045 } << 2046 xas_split(&xas, old, << 2047 << 2048 /* << 2049 * Re-set the swap en << 2050 * offset of the orig << 2051 */ << 2052 for (i = 0; i < 1 << << 2053 pgoff_t align << 2054 swp_entry_t t << 2055 << 2056 tmp = swp_ent << 2057 __xa_store(&m << 2058 sw << 2059 } << 2060 } << 2061 << 2062 unlock: << 2063 xas_unlock_irq(&xas); << 2064 << 2065 /* split needed, alloc here a << 2066 if (split_order) { << 2067 xas_split_alloc(&xas, << 2068 if (xas_error(&xas)) << 2069 goto error; << 2070 alloced_shadow = old; << 2071 alloced_order = split << 2072 xas_reset(&xas); << 2073 continue; << 2074 } << 2075 << 2076 if (!xas_nomem(&xas, gfp)) << 2077 break; << 2078 } 598 } 2079 599 2080 error: !! 600 *entry = swap; 2081 if (xas_error(&xas)) !! 601 info->swapped++; 2082 return xas_error(&xas); !! 602 spin_unlock(&info->lock); 2083 !! 603 SetPageUptodate(page); 2084 return alloced_order; !! 604 set_page_dirty(page); >> 605 UnlockPage(page); >> 606 return 0; >> 607 fail: >> 608 return fail_writepage(page); 2085 } 609 } 2086 610 2087 /* 611 /* 2088 * Swap in the folio pointed to by *foliop. !! 612 * shmem_getpage - either get the page from swap or allocate a new one 2089 * Caller has to make sure that *foliop conta !! 613 * 2090 * Returns 0 and the folio in foliop if succe !! 614 * If we allocate a new one we do not mark it dirty. That's up to the 2091 * error code and NULL in *foliop. !! 615 * vm. If we swap it in we mark it dirty since we also free the swap >> 616 * entry since a page cannot live in both the swap and page cache 2092 */ 617 */ 2093 static int shmem_swapin_folio(struct inode *i !! 618 static int shmem_getpage(struct inode *inode, unsigned long idx, struct page **pagep, enum sgp_type sgp) 2094 struct folio **f << 2095 gfp_t gfp, struc << 2096 vm_fault_t *faul << 2097 { 619 { 2098 struct address_space *mapping = inode 620 struct address_space *mapping = inode->i_mapping; 2099 struct mm_struct *fault_mm = vma ? vm << 2100 struct shmem_inode_info *info = SHMEM 621 struct shmem_inode_info *info = SHMEM_I(inode); 2101 struct swap_info_struct *si; !! 622 struct shmem_sb_info *sbinfo; 2102 struct folio *folio = NULL; !! 623 struct page *filepage = *pagep; >> 624 struct page *swappage; >> 625 swp_entry_t *entry; 2103 swp_entry_t swap; 626 swp_entry_t swap; 2104 int error, nr_pages; !! 627 int error = 0; 2105 << 2106 VM_BUG_ON(!*foliop || !xa_is_value(*f << 2107 swap = radix_to_swp_entry(*foliop); << 2108 *foliop = NULL; << 2109 << 2110 if (is_poisoned_swp_entry(swap)) << 2111 return -EIO; << 2112 << 2113 si = get_swap_device(swap); << 2114 if (!si) { << 2115 if (!shmem_confirm_swap(mappi << 2116 return -EEXIST; << 2117 else << 2118 return -EINVAL; << 2119 } << 2120 << 2121 /* Look it up and read it in.. */ << 2122 folio = swap_cache_get_folio(swap, NU << 2123 if (!folio) { << 2124 int split_order; << 2125 << 2126 /* Or update major stats only << 2127 if (fault_type) { << 2128 *fault_type |= VM_FAU << 2129 count_vm_event(PGMAJF << 2130 count_memcg_event_mm( << 2131 } << 2132 << 2133 /* << 2134 * Now swap device can only s << 2135 * should split the large swa << 2136 * if necessary. << 2137 */ << 2138 split_order = shmem_split_lar << 2139 if (split_order < 0) { << 2140 error = split_order; << 2141 goto failed; << 2142 } << 2143 << 2144 /* << 2145 * If the large swap entry ha << 2146 * necessary to recalculate t << 2147 * the old order alignment. << 2148 */ << 2149 if (split_order > 0) { << 2150 pgoff_t offset = inde << 2151 << 2152 swap = swp_entry(swp_ << 2153 } << 2154 << 2155 /* Here we actually start the << 2156 folio = shmem_swapin_cluster( << 2157 if (!folio) { << 2158 error = -ENOMEM; << 2159 goto failed; << 2160 } << 2161 } << 2162 628 2163 /* We have to do this with folio lock !! 629 if (idx >= SHMEM_MAX_INDEX) { 2164 folio_lock(folio); !! 630 error = -EFBIG; 2165 if (!folio_test_swapcache(folio) || << 2166 folio->swap.val != swap.val || << 2167 !shmem_confirm_swap(mapping, inde << 2168 error = -EEXIST; << 2169 goto unlock; << 2170 } << 2171 if (!folio_test_uptodate(folio)) { << 2172 error = -EIO; << 2173 goto failed; 631 goto failed; 2174 } 632 } 2175 folio_wait_writeback(folio); << 2176 nr_pages = folio_nr_pages(folio); << 2177 633 2178 /* 634 /* 2179 * Some architectures may have to res !! 635 * Normally, filepage is NULL on entry, and either found 2180 * folio after reading from swap. !! 636 * uptodate immediately, or allocated and zeroed, or read >> 637 * in under swappage, which is then assigned to filepage. >> 638 * But shmem_readpage and shmem_prepare_write pass in a locked >> 639 * filepage, which may be found not uptodate by other callers >> 640 * too, and may need to be copied from the swappage read in. 2181 */ 641 */ 2182 arch_swap_restore(folio_swap(swap, fo << 2183 << 2184 if (shmem_should_replace_folio(folio, << 2185 error = shmem_replace_folio(& << 2186 if (error) << 2187 goto failed; << 2188 } << 2189 << 2190 error = shmem_add_to_page_cache(folio << 2191 round << 2192 swp_t << 2193 if (error) << 2194 goto failed; << 2195 << 2196 shmem_recalc_inode(inode, 0, -nr_page << 2197 << 2198 if (sgp == SGP_WRITE) << 2199 folio_mark_accessed(folio); << 2200 << 2201 delete_from_swap_cache(folio); << 2202 folio_mark_dirty(folio); << 2203 swap_free_nr(swap, nr_pages); << 2204 put_swap_device(si); << 2205 << 2206 *foliop = folio; << 2207 return 0; << 2208 failed: << 2209 if (!shmem_confirm_swap(mapping, inde << 2210 error = -EEXIST; << 2211 if (error == -EIO) << 2212 shmem_set_folio_swapin_error( << 2213 unlock: << 2214 if (folio) { << 2215 folio_unlock(folio); << 2216 folio_put(folio); << 2217 } << 2218 put_swap_device(si); << 2219 << 2220 return error; << 2221 } << 2222 << 2223 /* << 2224 * shmem_get_folio_gfp - find page in cache, << 2225 * << 2226 * If we allocate a new one we do not mark it << 2227 * vm. If we swap it in we mark it dirty sinc << 2228 * entry since a page cannot live in both the << 2229 * << 2230 * vmf and fault_type are only supplied by sh << 2231 */ << 2232 static int shmem_get_folio_gfp(struct inode * << 2233 loff_t write_end, struct foli << 2234 gfp_t gfp, struct vm_fault *v << 2235 { << 2236 struct vm_area_struct *vma = vmf ? vm << 2237 struct mm_struct *fault_mm; << 2238 struct folio *folio; << 2239 int error; << 2240 bool alloced; << 2241 unsigned long orders = 0; << 2242 << 2243 if (WARN_ON_ONCE(!shmem_mapping(inode << 2244 return -EINVAL; << 2245 << 2246 if (index > (MAX_LFS_FILESIZE >> PAGE << 2247 return -EFBIG; << 2248 repeat: 642 repeat: 2249 if (sgp <= SGP_CACHE && !! 643 if (!filepage) 2250 ((loff_t)index << PAGE_SHIFT) >= !! 644 filepage = find_lock_page(mapping, idx); 2251 return -EINVAL; !! 645 if (filepage && Page_Uptodate(filepage)) 2252 !! 646 goto done; 2253 alloced = false; << 2254 fault_mm = vma ? vma->vm_mm : NULL; << 2255 647 2256 folio = filemap_get_entry(inode->i_ma !! 648 spin_lock(&info->lock); 2257 if (folio && vma && userfaultfd_minor !! 649 entry = shmem_swp_alloc(info, idx, sgp); 2258 if (!xa_is_value(folio)) !! 650 if (IS_ERR(entry)) { 2259 folio_put(folio); !! 651 spin_unlock(&info->lock); 2260 *fault_type = handle_userfaul !! 652 error = PTR_ERR(entry); 2261 return 0; !! 653 goto failed; 2262 } 654 } >> 655 swap = *entry; 2263 656 2264 if (xa_is_value(folio)) { !! 657 if (swap.val) { 2265 error = shmem_swapin_folio(in !! 658 /* Look it up and read it in.. */ 2266 sg !! 659 swappage = lookup_swap_cache(swap); 2267 if (error == -EEXIST) !! 660 if (!swappage) { >> 661 spin_unlock(&info->lock); >> 662 swapin_readahead(swap); >> 663 swappage = read_swap_cache_async(swap); >> 664 if (!swappage) { >> 665 spin_lock(&info->lock); >> 666 entry = shmem_swp_alloc(info, idx, sgp); >> 667 if (IS_ERR(entry)) >> 668 error = PTR_ERR(entry); >> 669 else if (entry->val == swap.val) >> 670 error = -ENOMEM; >> 671 spin_unlock(&info->lock); >> 672 if (error) >> 673 goto failed; >> 674 goto repeat; >> 675 } >> 676 wait_on_page(swappage); >> 677 page_cache_release(swappage); 2268 goto repeat; 678 goto repeat; >> 679 } 2269 680 2270 *foliop = folio; !! 681 /* We have to do this with page locked to prevent races */ 2271 return error; !! 682 if (TryLockPage(swappage)) { 2272 } !! 683 spin_unlock(&info->lock); 2273 !! 684 wait_on_page(swappage); 2274 if (folio) { !! 685 page_cache_release(swappage); 2275 folio_lock(folio); << 2276 << 2277 /* Has the folio been truncat << 2278 if (unlikely(folio->mapping ! << 2279 folio_unlock(folio); << 2280 folio_put(folio); << 2281 goto repeat; 686 goto repeat; 2282 } 687 } 2283 if (sgp == SGP_WRITE) !! 688 if (!Page_Uptodate(swappage)) { 2284 folio_mark_accessed(f !! 689 spin_unlock(&info->lock); 2285 if (folio_test_uptodate(folio !! 690 UnlockPage(swappage); 2286 goto out; !! 691 page_cache_release(swappage); 2287 /* fallocated folio */ !! 692 error = -EIO; 2288 if (sgp != SGP_READ) !! 693 goto failed; 2289 goto clear; << 2290 folio_unlock(folio); << 2291 folio_put(folio); << 2292 } << 2293 << 2294 /* << 2295 * SGP_READ: succeed on hole, with NU << 2296 * SGP_NOALLOC: fail on hole, with NU << 2297 */ << 2298 *foliop = NULL; << 2299 if (sgp == SGP_READ) << 2300 return 0; << 2301 if (sgp == SGP_NOALLOC) << 2302 return -ENOENT; << 2303 << 2304 /* << 2305 * Fast cache lookup and swap lookup << 2306 */ << 2307 << 2308 if (vma && userfaultfd_missing(vma)) << 2309 *fault_type = handle_userfaul << 2310 return 0; << 2311 } << 2312 << 2313 /* Find hugepage orders that are allo << 2314 orders = shmem_allowable_huge_orders( << 2315 if (orders > 0) { << 2316 gfp_t huge_gfp; << 2317 << 2318 huge_gfp = vma_thp_gfp_mask(v << 2319 huge_gfp = limit_gfp_mask(hug << 2320 folio = shmem_alloc_and_add_f << 2321 inode, index, << 2322 if (!IS_ERR(folio)) { << 2323 if (folio_test_pmd_ma << 2324 count_vm_even << 2325 count_mthp_stat(folio << 2326 goto alloced; << 2327 } 694 } 2328 if (PTR_ERR(folio) == -EEXIST << 2329 goto repeat; << 2330 } << 2331 695 2332 folio = shmem_alloc_and_add_folio(vmf !! 696 delete_from_swap_cache(swappage); 2333 if (IS_ERR(folio)) { !! 697 if (filepage) { 2334 error = PTR_ERR(folio); !! 698 entry->val = 0; 2335 if (error == -EEXIST) !! 699 info->swapped--; >> 700 spin_unlock(&info->lock); >> 701 flush_page_to_ram(swappage); >> 702 copy_highpage(filepage, swappage); >> 703 UnlockPage(swappage); >> 704 page_cache_release(swappage); >> 705 flush_dcache_page(filepage); >> 706 SetPageUptodate(filepage); >> 707 SetPageDirty(filepage); >> 708 swap_free(swap); >> 709 } else if (add_to_page_cache_unique(swappage, >> 710 mapping, idx, page_hash(mapping, idx)) == 0) { >> 711 info->flags |= SHMEM_PAGEIN; >> 712 entry->val = 0; >> 713 info->swapped--; >> 714 spin_unlock(&info->lock); >> 715 filepage = swappage; >> 716 SetPageUptodate(filepage); >> 717 SetPageDirty(filepage); >> 718 swap_free(swap); >> 719 } else { >> 720 if (add_to_swap_cache(swappage, swap) != 0) >> 721 BUG(); >> 722 spin_unlock(&info->lock); >> 723 SetPageUptodate(swappage); >> 724 SetPageDirty(swappage); >> 725 UnlockPage(swappage); >> 726 page_cache_release(swappage); 2336 goto repeat; 727 goto repeat; 2337 folio = NULL; << 2338 goto unlock; << 2339 } << 2340 << 2341 alloced: << 2342 alloced = true; << 2343 if (folio_test_large(folio) && << 2344 DIV_ROUND_UP(i_size_read(inode), << 2345 folio << 2346 struct shmem_sb_info *sbinfo << 2347 struct shmem_inode_info *info << 2348 /* << 2349 * Part of the large folio is << 2350 * to shrink under memory pre << 2351 */ << 2352 spin_lock(&sbinfo->shrinklist << 2353 /* << 2354 * _careful to defend against << 2355 * ->shrink_list in shmem_unu << 2356 */ << 2357 if (list_empty_careful(&info- << 2358 list_add_tail(&info-> << 2359 &sbinfo << 2360 sbinfo->shrinklist_le << 2361 } 728 } 2362 spin_unlock(&sbinfo->shrinkli !! 729 } else if (sgp == SGP_READ && !filepage) { 2363 } !! 730 filepage = find_get_page(mapping, idx); 2364 !! 731 if (filepage && 2365 if (sgp == SGP_WRITE) !! 732 (!Page_Uptodate(filepage) || TryLockPage(filepage))) { 2366 folio_set_referenced(folio); !! 733 spin_unlock(&info->lock); 2367 /* !! 734 wait_on_page(filepage); 2368 * Let SGP_FALLOC use the SGP_WRITE o !! 735 page_cache_release(filepage); 2369 */ !! 736 filepage = NULL; 2370 if (sgp == SGP_FALLOC) !! 737 goto repeat; 2371 sgp = SGP_WRITE; !! 738 } 2372 clear: !! 739 spin_unlock(&info->lock); 2373 /* !! 740 } else { 2374 * Let SGP_WRITE caller clear ends if !! 741 sbinfo = SHMEM_SB(inode->i_sb); 2375 * but SGP_FALLOC on a folio fallocat !! 742 spin_lock(&sbinfo->stat_lock); 2376 * it now, lest undo on failure cance !! 743 if (sbinfo->free_blocks == 0) { 2377 */ !! 744 spin_unlock(&sbinfo->stat_lock); 2378 if (sgp != SGP_WRITE && !folio_test_u !! 745 spin_unlock(&info->lock); 2379 long i, n = folio_nr_pages(fo !! 746 error = -ENOSPC; >> 747 goto failed; >> 748 } >> 749 sbinfo->free_blocks--; >> 750 inode->i_blocks += BLOCKS_PER_PAGE; >> 751 spin_unlock(&sbinfo->stat_lock); >> 752 >> 753 if (!filepage) { >> 754 spin_unlock(&info->lock); >> 755 filepage = page_cache_alloc(mapping); >> 756 if (!filepage) { >> 757 shmem_free_block(inode); >> 758 error = -ENOMEM; >> 759 goto failed; >> 760 } 2380 761 2381 for (i = 0; i < n; i++) !! 762 spin_lock(&info->lock); 2382 clear_highpage(folio_ !! 763 entry = shmem_swp_alloc(info, idx, sgp); 2383 flush_dcache_folio(folio); !! 764 if (IS_ERR(entry)) 2384 folio_mark_uptodate(folio); !! 765 error = PTR_ERR(entry); 2385 } !! 766 if (error || entry->val || >> 767 add_to_page_cache_unique(filepage, >> 768 mapping, idx, page_hash(mapping, idx)) != 0) { >> 769 spin_unlock(&info->lock); >> 770 page_cache_release(filepage); >> 771 shmem_free_block(inode); >> 772 filepage = NULL; >> 773 if (error) >> 774 goto failed; >> 775 goto repeat; >> 776 } >> 777 info->flags |= SHMEM_PAGEIN; >> 778 } 2386 779 2387 /* Perhaps the file has been truncate !! 780 spin_unlock(&info->lock); 2388 if (sgp <= SGP_CACHE && !! 781 clear_highpage(filepage); 2389 ((loff_t)index << PAGE_SHIFT) >= !! 782 flush_dcache_page(filepage); 2390 error = -EINVAL; !! 783 SetPageUptodate(filepage); 2391 goto unlock; !! 784 } >> 785 done: >> 786 if (!*pagep) { >> 787 if (filepage) >> 788 UnlockPage(filepage); >> 789 else >> 790 filepage = ZERO_PAGE(0); >> 791 *pagep = filepage; 2392 } 792 } 2393 out: !! 793 if (PageError(filepage)) 2394 *foliop = folio; !! 794 ClearPageError(filepage); 2395 return 0; 795 return 0; 2396 796 2397 /* !! 797 failed: 2398 * Error recovery. !! 798 if (filepage) { 2399 */ !! 799 if (*pagep == filepage) 2400 unlock: !! 800 SetPageError(filepage); 2401 if (alloced) !! 801 else { 2402 filemap_remove_folio(folio); !! 802 UnlockPage(filepage); 2403 shmem_recalc_inode(inode, 0, 0); !! 803 page_cache_release(filepage); 2404 if (folio) { !! 804 } 2405 folio_unlock(folio); << 2406 folio_put(folio); << 2407 } 805 } 2408 return error; 806 return error; 2409 } 807 } 2410 808 2411 /** !! 809 struct page *shmem_nopage(struct vm_area_struct *vma, unsigned long address, int unused) 2412 * shmem_get_folio - find, and lock a shmem f << 2413 * @inode: inode to search << 2414 * @index: the page index. << 2415 * @write_end: end of a write, could extend << 2416 * @foliop: pointer to the folio if found << 2417 * @sgp: SGP_* flags to control behavi << 2418 * << 2419 * Looks up the page cache entry at @inode & << 2420 * present, it is returned locked with an inc << 2421 * << 2422 * If the caller modifies data in the folio, << 2423 * before unlocking the folio to ensure that << 2424 * There is no need to reserve space before c << 2425 * << 2426 * When no folio is found, the behavior depen << 2427 * - for SGP_READ, *@foliop is %NULL and 0 i << 2428 * - for SGP_NOALLOC, *@foliop is %NULL and << 2429 * - for all other flags a new folio is allo << 2430 * page cache and returned locked in @foli << 2431 * << 2432 * Context: May sleep. << 2433 * Return: 0 if successful, else a negative e << 2434 */ << 2435 int shmem_get_folio(struct inode *inode, pgof << 2436 struct folio **foliop, en << 2437 { << 2438 return shmem_get_folio_gfp(inode, ind << 2439 mapping_gfp_mask(inod << 2440 } << 2441 EXPORT_SYMBOL_GPL(shmem_get_folio); << 2442 << 2443 /* << 2444 * This is like autoremove_wake_function, but << 2445 * entry unconditionally - even if something << 2446 * target. << 2447 */ << 2448 static int synchronous_wake_function(wait_que << 2449 unsigned int mode, in << 2450 { << 2451 int ret = default_wake_function(wait, << 2452 list_del_init(&wait->entry); << 2453 return ret; << 2454 } << 2455 << 2456 /* << 2457 * Trinity finds that probing a hole which tm << 2458 * prevent the hole-punch from ever completin << 2459 * locks writers out with its hold on i_rwsem << 2460 * faulting pages into the hole while it's be << 2461 * shmem_undo_range() does remove the additio << 2462 * keep up, as each new page needs its own un << 2463 * and the i_mmap tree grows ever slower to s << 2464 * << 2465 * It does not matter if we sometimes reach t << 2466 * hole-punch begins, so that one fault then << 2467 * we just need to make racing faults a rare << 2468 * << 2469 * The implementation below would be much sim << 2470 * standard mutex or completion: but we canno << 2471 * and bloating every shmem inode for this un << 2472 */ << 2473 static vm_fault_t shmem_falloc_wait(struct vm << 2474 { << 2475 struct shmem_falloc *shmem_falloc; << 2476 struct file *fpin = NULL; << 2477 vm_fault_t ret = 0; << 2478 << 2479 spin_lock(&inode->i_lock); << 2480 shmem_falloc = inode->i_private; << 2481 if (shmem_falloc && << 2482 shmem_falloc->waitq && << 2483 vmf->pgoff >= shmem_falloc->start << 2484 vmf->pgoff < shmem_falloc->next) << 2485 wait_queue_head_t *shmem_fall << 2486 DEFINE_WAIT_FUNC(shmem_fault_ << 2487 << 2488 ret = VM_FAULT_NOPAGE; << 2489 fpin = maybe_unlock_mmap_for_ << 2490 shmem_falloc_waitq = shmem_fa << 2491 prepare_to_wait(shmem_falloc_ << 2492 TASK_UNINTERR << 2493 spin_unlock(&inode->i_lock); << 2494 schedule(); << 2495 << 2496 /* << 2497 * shmem_falloc_waitq points << 2498 * stack of the hole-punching << 2499 * is usually invalid by the << 2500 * finish_wait() does not der << 2501 * though i_lock needed lest << 2502 */ << 2503 spin_lock(&inode->i_lock); << 2504 finish_wait(shmem_falloc_wait << 2505 } << 2506 spin_unlock(&inode->i_lock); << 2507 if (fpin) { << 2508 fput(fpin); << 2509 ret = VM_FAULT_RETRY; << 2510 } << 2511 return ret; << 2512 } << 2513 << 2514 static vm_fault_t shmem_fault(struct vm_fault << 2515 { 810 { 2516 struct inode *inode = file_inode(vmf- !! 811 struct inode *inode = vma->vm_file->f_dentry->d_inode; 2517 gfp_t gfp = mapping_gfp_mask(inode->i !! 812 struct page *page = NULL; 2518 struct folio *folio = NULL; !! 813 unsigned long idx; 2519 vm_fault_t ret = 0; !! 814 int error; 2520 int err; << 2521 << 2522 /* << 2523 * Trinity finds that probing a hole << 2524 * prevent the hole-punch from ever c << 2525 */ << 2526 if (unlikely(inode->i_private)) { << 2527 ret = shmem_falloc_wait(vmf, << 2528 if (ret) << 2529 return ret; << 2530 } << 2531 << 2532 WARN_ON_ONCE(vmf->page != NULL); << 2533 err = shmem_get_folio_gfp(inode, vmf- << 2534 gfp, vmf, & << 2535 if (err) << 2536 return vmf_error(err); << 2537 if (folio) { << 2538 vmf->page = folio_file_page(f << 2539 ret |= VM_FAULT_LOCKED; << 2540 } << 2541 return ret; << 2542 } << 2543 << 2544 unsigned long shmem_get_unmapped_area(struct << 2545 unsigne << 2546 unsigne << 2547 { << 2548 unsigned long addr; << 2549 unsigned long offset; << 2550 unsigned long inflated_len; << 2551 unsigned long inflated_addr; << 2552 unsigned long inflated_offset; << 2553 unsigned long hpage_size; << 2554 << 2555 if (len > TASK_SIZE) << 2556 return -ENOMEM; << 2557 << 2558 addr = mm_get_unmapped_area(current-> << 2559 flags); << 2560 << 2561 if (!IS_ENABLED(CONFIG_TRANSPARENT_HU << 2562 return addr; << 2563 if (IS_ERR_VALUE(addr)) << 2564 return addr; << 2565 if (addr & ~PAGE_MASK) << 2566 return addr; << 2567 if (addr > TASK_SIZE - len) << 2568 return addr; << 2569 << 2570 if (shmem_huge == SHMEM_HUGE_DENY) << 2571 return addr; << 2572 if (flags & MAP_FIXED) << 2573 return addr; << 2574 /* << 2575 * Our priority is to support MAP_SHA << 2576 * and support MAP_PRIVATE mapped hug << 2577 * But if caller specified an address << 2578 * successfully, respect that as befo << 2579 */ << 2580 if (uaddr == addr) << 2581 return addr; << 2582 << 2583 hpage_size = HPAGE_PMD_SIZE; << 2584 if (shmem_huge != SHMEM_HUGE_FORCE) { << 2585 struct super_block *sb; << 2586 unsigned long __maybe_unused << 2587 int order = 0; << 2588 << 2589 if (file) { << 2590 VM_BUG_ON(file->f_op << 2591 sb = file_inode(file) << 2592 } else { << 2593 /* << 2594 * Called directly fr << 2595 * for "/dev/zero", t << 2596 */ << 2597 if (IS_ERR(shm_mnt)) << 2598 return addr; << 2599 sb = shm_mnt->mnt_sb; << 2600 815 2601 /* !! 816 idx = (address - vma->vm_start) >> PAGE_SHIFT; 2602 * Find the highest m !! 817 idx += vma->vm_pgoff; 2603 * provide a suitable !! 818 idx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT; 2604 */ << 2605 #ifdef CONFIG_TRANSPARENT_HUGEPAGE << 2606 hpage_orders = READ_O << 2607 hpage_orders |= READ_ << 2608 hpage_orders |= READ_ << 2609 if (SHMEM_SB(sb)->hug << 2610 hpage_orders << 2611 << 2612 if (hpage_orders > 0) << 2613 order = highe << 2614 hpage_size = << 2615 } << 2616 #endif << 2617 } << 2618 if (SHMEM_SB(sb)->huge == SHM << 2619 return addr; << 2620 } << 2621 819 2622 if (len < hpage_size) !! 820 error = shmem_getpage(inode, idx, &page, SGP_CACHE); 2623 return addr; !! 821 if (error) >> 822 return (error == -ENOMEM)? NOPAGE_OOM: NOPAGE_SIGBUS; 2624 823 2625 offset = (pgoff << PAGE_SHIFT) & (hpa !! 824 mark_page_accessed(page); 2626 if (offset && offset + len < 2 * hpag !! 825 flush_page_to_ram(page); 2627 return addr; !! 826 return page; 2628 if ((addr & (hpage_size - 1)) == offs << 2629 return addr; << 2630 << 2631 inflated_len = len + hpage_size - PAG << 2632 if (inflated_len > TASK_SIZE) << 2633 return addr; << 2634 if (inflated_len < len) << 2635 return addr; << 2636 << 2637 inflated_addr = mm_get_unmapped_area( << 2638 << 2639 if (IS_ERR_VALUE(inflated_addr)) << 2640 return addr; << 2641 if (inflated_addr & ~PAGE_MASK) << 2642 return addr; << 2643 << 2644 inflated_offset = inflated_addr & (hp << 2645 inflated_addr += offset - inflated_of << 2646 if (inflated_offset > offset) << 2647 inflated_addr += hpage_size; << 2648 << 2649 if (inflated_addr > TASK_SIZE - len) << 2650 return addr; << 2651 return inflated_addr; << 2652 } << 2653 << 2654 #ifdef CONFIG_NUMA << 2655 static int shmem_set_policy(struct vm_area_st << 2656 { << 2657 struct inode *inode = file_inode(vma- << 2658 return mpol_set_shared_policy(&SHMEM_ << 2659 } 827 } 2660 828 2661 static struct mempolicy *shmem_get_policy(str !! 829 void shmem_lock(struct file *file, int lock) 2662 uns << 2663 { 830 { 2664 struct inode *inode = file_inode(vma- !! 831 struct inode *inode = file->f_dentry->d_inode; 2665 pgoff_t index; !! 832 struct shmem_inode_info *info = SHMEM_I(inode); 2666 833 2667 /* !! 834 spin_lock(&info->lock); 2668 * Bias interleave by inode number to !! 835 if (lock) 2669 * but this interface is independent !! 836 info->flags |= VM_LOCKED; 2670 * supplies only that bias, letting c !! 837 else 2671 * by page order, as in shmem_get_pgo !! 838 info->flags &= ~VM_LOCKED; 2672 */ !! 839 spin_unlock(&info->lock); 2673 *ilx = inode->i_ino; << 2674 index = ((addr - vma->vm_start) >> PA << 2675 return mpol_shared_policy_lookup(&SHM << 2676 } 840 } 2677 841 2678 static struct mempolicy *shmem_get_pgoff_poli !! 842 static int shmem_mmap(struct file *file, struct vm_area_struct *vma) 2679 pgoff_t index, unsign << 2680 { 843 { 2681 struct mempolicy *mpol; !! 844 struct vm_operations_struct *ops; 2682 !! 845 struct inode *inode = file->f_dentry->d_inode; 2683 /* Bias interleave by inode number to << 2684 *ilx = info->vfs_inode.i_ino + (index << 2685 846 2686 mpol = mpol_shared_policy_lookup(&inf !! 847 ops = &shmem_vm_ops; 2687 return mpol ? mpol : get_task_policy( !! 848 if (!S_ISREG(inode->i_mode)) 2688 } !! 849 return -EACCES; 2689 #else !! 850 UPDATE_ATIME(inode); 2690 static struct mempolicy *shmem_get_pgoff_poli !! 851 vma->vm_ops = ops; 2691 pgoff_t index, unsign !! 852 return 0; 2692 { << 2693 *ilx = 0; << 2694 return NULL; << 2695 } 853 } 2696 #endif /* CONFIG_NUMA */ << 2697 854 2698 int shmem_lock(struct file *file, int lock, s !! 855 static struct inode *shmem_get_inode(struct super_block *sb, int mode, int dev) 2699 { 856 { 2700 struct inode *inode = file_inode(file !! 857 struct inode *inode; 2701 struct shmem_inode_info *info = SHMEM !! 858 struct shmem_inode_info *info; 2702 int retval = -ENOMEM; !! 859 struct shmem_sb_info *sbinfo = SHMEM_SB(sb); 2703 860 2704 /* !! 861 spin_lock(&sbinfo->stat_lock); 2705 * What serializes the accesses to in !! 862 if (!sbinfo->free_inodes) { 2706 * ipc_lock_object() when called from !! 863 spin_unlock(&sbinfo->stat_lock); 2707 * no serialization needed when calle !! 864 return NULL; 2708 */ << 2709 if (lock && !(info->flags & VM_LOCKED << 2710 if (!user_shm_lock(inode->i_s << 2711 goto out_nomem; << 2712 info->flags |= VM_LOCKED; << 2713 mapping_set_unevictable(file- << 2714 } << 2715 if (!lock && (info->flags & VM_LOCKED << 2716 user_shm_unlock(inode->i_size << 2717 info->flags &= ~VM_LOCKED; << 2718 mapping_clear_unevictable(fil << 2719 } 865 } 2720 retval = 0; !! 866 sbinfo->free_inodes--; >> 867 spin_unlock(&sbinfo->stat_lock); 2721 868 2722 out_nomem: !! 869 inode = new_inode(sb); 2723 return retval; !! 870 if (inode) { >> 871 inode->i_mode = mode; >> 872 inode->i_uid = current->fsuid; >> 873 inode->i_gid = current->fsgid; >> 874 inode->i_blksize = PAGE_CACHE_SIZE; >> 875 inode->i_blocks = 0; >> 876 inode->i_rdev = NODEV; >> 877 inode->i_mapping->a_ops = &shmem_aops; >> 878 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; >> 879 info = SHMEM_I(inode); >> 880 info->inode = inode; >> 881 spin_lock_init(&info->lock); >> 882 switch (mode & S_IFMT) { >> 883 default: >> 884 init_special_inode(inode, mode, dev); >> 885 break; >> 886 case S_IFREG: >> 887 inode->i_op = &shmem_inode_operations; >> 888 inode->i_fop = &shmem_file_operations; >> 889 spin_lock(&shmem_ilock); >> 890 list_add_tail(&info->list, &shmem_inodes); >> 891 spin_unlock(&shmem_ilock); >> 892 break; >> 893 case S_IFDIR: >> 894 inode->i_nlink++; >> 895 /* Some things misbehave if size == 0 on a directory */ >> 896 inode->i_size = 2 * BOGO_DIRENT_SIZE; >> 897 inode->i_op = &shmem_dir_inode_operations; >> 898 inode->i_fop = &dcache_dir_ops; >> 899 break; >> 900 case S_IFLNK: >> 901 break; >> 902 } >> 903 } >> 904 return inode; 2724 } 905 } 2725 906 2726 static int shmem_mmap(struct file *file, stru !! 907 static int shmem_set_size(struct shmem_sb_info *info, >> 908 unsigned long max_blocks, unsigned long max_inodes) 2727 { 909 { 2728 struct inode *inode = file_inode(file !! 910 int error; 2729 struct shmem_inode_info *info = SHMEM !! 911 unsigned long blocks, inodes; 2730 int ret; << 2731 912 2732 ret = seal_check_write(info->seals, v !! 913 spin_lock(&info->stat_lock); 2733 if (ret) !! 914 blocks = info->max_blocks - info->free_blocks; 2734 return ret; !! 915 inodes = info->max_inodes - info->free_inodes; 2735 !! 916 error = -EINVAL; 2736 file_accessed(file); !! 917 if (max_blocks < blocks) 2737 /* This is anonymous shared memory if !! 918 goto out; 2738 if (inode->i_nlink) !! 919 if (max_inodes < inodes) 2739 vma->vm_ops = &shmem_vm_ops; !! 920 goto out; 2740 else !! 921 error = 0; 2741 vma->vm_ops = &shmem_anon_vm_ !! 922 info->max_blocks = max_blocks; 2742 return 0; !! 923 info->free_blocks = max_blocks - blocks; >> 924 info->max_inodes = max_inodes; >> 925 info->free_inodes = max_inodes - inodes; >> 926 out: >> 927 spin_unlock(&info->stat_lock); >> 928 return error; 2743 } 929 } 2744 930 2745 static int shmem_file_open(struct inode *inod !! 931 #ifdef CONFIG_TMPFS 2746 { << 2747 file->f_mode |= FMODE_CAN_ODIRECT; << 2748 return generic_file_open(inode, file) << 2749 } << 2750 932 2751 #ifdef CONFIG_TMPFS_XATTR !! 933 static struct inode_operations shmem_symlink_inode_operations; 2752 static int shmem_initxattrs(struct inode *, c !! 934 static struct inode_operations shmem_symlink_inline_operations; 2753 935 2754 /* 936 /* 2755 * chattr's fsflags are unrelated to extended !! 937 * tmpfs itself makes no use of generic_file_read, generic_file_mmap 2756 * but tmpfs has chosen to enable them under !! 938 * or generic_file_write; but shmem_readpage, shmem_prepare_write and >> 939 * shmem_commit_write let a tmpfs file be used below the loop driver, >> 940 * and shmem_readpage lets a tmpfs file be used by sendfile. 2757 */ 941 */ 2758 static void shmem_set_inode_flags(struct inod !! 942 static int 2759 { !! 943 shmem_readpage(struct file *file, struct page *page) 2760 unsigned int i_flags = 0; << 2761 << 2762 if (fsflags & FS_NOATIME_FL) << 2763 i_flags |= S_NOATIME; << 2764 if (fsflags & FS_APPEND_FL) << 2765 i_flags |= S_APPEND; << 2766 if (fsflags & FS_IMMUTABLE_FL) << 2767 i_flags |= S_IMMUTABLE; << 2768 /* << 2769 * But FS_NODUMP_FL does not require << 2770 */ << 2771 inode_set_flags(inode, i_flags, S_NOA << 2772 } << 2773 #else << 2774 static void shmem_set_inode_flags(struct inod << 2775 { 944 { >> 945 struct inode *inode = page->mapping->host; >> 946 int error = shmem_getpage(inode, page->index, &page, SGP_CACHE); >> 947 UnlockPage(page); >> 948 return error; 2776 } 949 } 2777 #define shmem_initxattrs NULL << 2778 #endif << 2779 950 2780 static struct offset_ctx *shmem_get_offset_ct !! 951 static int >> 952 shmem_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to) 2781 { 953 { 2782 return &SHMEM_I(inode)->dir_offsets; !! 954 struct inode *inode = page->mapping->host; >> 955 return shmem_getpage(inode, page->index, &page, SGP_WRITE); 2783 } 956 } 2784 957 2785 static struct inode *__shmem_get_inode(struct !! 958 static int 2786 !! 959 shmem_commit_write(struct file *file, struct page *page, unsigned offset, unsigned to) 2787 << 2788 << 2789 { 960 { 2790 struct inode *inode; !! 961 struct inode *inode = page->mapping->host; 2791 struct shmem_inode_info *info; !! 962 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; 2792 struct shmem_sb_info *sbinfo = SHMEM_ << 2793 ino_t ino; << 2794 int err; << 2795 963 2796 err = shmem_reserve_inode(sb, &ino); !! 964 if (pos > inode->i_size) 2797 if (err) !! 965 inode->i_size = pos; 2798 return ERR_PTR(err); !! 966 SetPageDirty(page); 2799 !! 967 return 0; 2800 inode = new_inode(sb); << 2801 if (!inode) { << 2802 shmem_free_inode(sb, 0); << 2803 return ERR_PTR(-ENOSPC); << 2804 } << 2805 << 2806 inode->i_ino = ino; << 2807 inode_init_owner(idmap, inode, dir, m << 2808 inode->i_blocks = 0; << 2809 simple_inode_init_ts(inode); << 2810 inode->i_generation = get_random_u32( << 2811 info = SHMEM_I(inode); << 2812 memset(info, 0, (char *)inode - (char << 2813 spin_lock_init(&info->lock); << 2814 atomic_set(&info->stop_eviction, 0); << 2815 info->seals = F_SEAL_SEAL; << 2816 info->flags = flags & VM_NORESERVE; << 2817 info->i_crtime = inode_get_mtime(inod << 2818 info->fsflags = (dir == NULL) ? 0 : << 2819 SHMEM_I(dir)->fsflags & SHMEM << 2820 if (info->fsflags) << 2821 shmem_set_inode_flags(inode, << 2822 INIT_LIST_HEAD(&info->shrinklist); << 2823 INIT_LIST_HEAD(&info->swaplist); << 2824 simple_xattrs_init(&info->xattrs); << 2825 cache_no_acl(inode); << 2826 if (sbinfo->noswap) << 2827 mapping_set_unevictable(inode << 2828 mapping_set_large_folios(inode->i_map << 2829 << 2830 switch (mode & S_IFMT) { << 2831 default: << 2832 inode->i_op = &shmem_special_ << 2833 init_special_inode(inode, mod << 2834 break; << 2835 case S_IFREG: << 2836 inode->i_mapping->a_ops = &sh << 2837 inode->i_op = &shmem_inode_op << 2838 inode->i_fop = &shmem_file_op << 2839 mpol_shared_policy_init(&info << 2840 shme << 2841 break; << 2842 case S_IFDIR: << 2843 inc_nlink(inode); << 2844 /* Some things misbehave if s << 2845 inode->i_size = 2 * BOGO_DIRE << 2846 inode->i_op = &shmem_dir_inod << 2847 inode->i_fop = &simple_offset << 2848 simple_offset_init(shmem_get_ << 2849 break; << 2850 case S_IFLNK: << 2851 /* << 2852 * Must not load anything in << 2853 * mpol_free_shared_policy wi << 2854 */ << 2855 mpol_shared_policy_init(&info << 2856 break; << 2857 } << 2858 << 2859 lockdep_annotate_inode_mutex_key(inod << 2860 return inode; << 2861 } 968 } 2862 969 2863 #ifdef CONFIG_TMPFS_QUOTA !! 970 static ssize_t 2864 static struct inode *shmem_get_inode(struct m !! 971 shmem_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos) 2865 struct s << 2866 umode_t << 2867 { 972 { 2868 int err; !! 973 struct inode *inode = file->f_dentry->d_inode; 2869 struct inode *inode; !! 974 loff_t pos; 2870 !! 975 unsigned long written; 2871 inode = __shmem_get_inode(idmap, sb, !! 976 ssize_t err; 2872 if (IS_ERR(inode)) << 2873 return inode; << 2874 << 2875 err = dquot_initialize(inode); << 2876 if (err) << 2877 goto errout; << 2878 << 2879 err = dquot_alloc_inode(inode); << 2880 if (err) { << 2881 dquot_drop(inode); << 2882 goto errout; << 2883 } << 2884 return inode; << 2885 << 2886 errout: << 2887 inode->i_flags |= S_NOQUOTA; << 2888 iput(inode); << 2889 return ERR_PTR(err); << 2890 } << 2891 #else << 2892 static inline struct inode *shmem_get_inode(s << 2893 struct s << 2894 umode_t << 2895 { << 2896 return __shmem_get_inode(idmap, sb, d << 2897 } << 2898 #endif /* CONFIG_TMPFS_QUOTA */ << 2899 << 2900 #ifdef CONFIG_USERFAULTFD << 2901 int shmem_mfill_atomic_pte(pmd_t *dst_pmd, << 2902 struct vm_area_str << 2903 unsigned long dst_ << 2904 unsigned long src_ << 2905 uffd_flags_t flags << 2906 struct folio **fol << 2907 { << 2908 struct inode *inode = file_inode(dst_ << 2909 struct shmem_inode_info *info = SHMEM << 2910 struct address_space *mapping = inode << 2911 gfp_t gfp = mapping_gfp_mask(mapping) << 2912 pgoff_t pgoff = linear_page_index(dst << 2913 void *page_kaddr; << 2914 struct folio *folio; << 2915 int ret; << 2916 pgoff_t max_off; << 2917 977 2918 if (shmem_inode_acct_blocks(inode, 1) !! 978 if ((ssize_t) count < 0) 2919 /* !! 979 return -EINVAL; 2920 * We may have got a page, re << 2921 * and now we find ourselves << 2922 * avoid a BUG_ON in our call << 2923 */ << 2924 if (unlikely(*foliop)) { << 2925 folio_put(*foliop); << 2926 *foliop = NULL; << 2927 } << 2928 return -ENOMEM; << 2929 } << 2930 980 2931 if (!*foliop) { !! 981 if (!access_ok(VERIFY_READ, buf, count)) 2932 ret = -ENOMEM; !! 982 return -EFAULT; 2933 folio = shmem_alloc_folio(gfp << 2934 if (!folio) << 2935 goto out_unacct_block << 2936 983 2937 if (uffd_flags_mode_is(flags, !! 984 down(&inode->i_sem); 2938 page_kaddr = kmap_loc << 2939 /* << 2940 * The read mmap_lock << 2941 * mmap_lock being re << 2942 * possible if a writ << 2943 * << 2944 * process A thread 1 << 2945 * process A thread 2 << 2946 * process B thread 1 << 2947 * process B thread 2 << 2948 * process A thread 1 << 2949 * process B thread 1 << 2950 * << 2951 * Disable page fault << 2952 * and retry the copy << 2953 */ << 2954 pagefault_disable(); << 2955 ret = copy_from_user( << 2956 << 2957 << 2958 pagefault_enable(); << 2959 kunmap_local(page_kad << 2960 << 2961 /* fallback to copy_f << 2962 if (unlikely(ret)) { << 2963 *foliop = fol << 2964 ret = -ENOENT << 2965 /* don't free << 2966 goto out_unac << 2967 } << 2968 985 2969 flush_dcache_folio(fo !! 986 pos = *ppos; 2970 } else { /* ZE !! 987 written = 0; 2971 clear_user_highpage(& << 2972 } << 2973 } else { << 2974 folio = *foliop; << 2975 VM_BUG_ON_FOLIO(folio_test_la << 2976 *foliop = NULL; << 2977 } << 2978 988 2979 VM_BUG_ON(folio_test_locked(folio)); !! 989 err = precheck_file_write(file, inode, &count, &pos); 2980 VM_BUG_ON(folio_test_swapbacked(folio !! 990 if (err || !count) 2981 __folio_set_locked(folio); !! 991 goto out; 2982 __folio_set_swapbacked(folio); << 2983 __folio_mark_uptodate(folio); << 2984 << 2985 ret = -EFAULT; << 2986 max_off = DIV_ROUND_UP(i_size_read(in << 2987 if (unlikely(pgoff >= max_off)) << 2988 goto out_release; << 2989 << 2990 ret = mem_cgroup_charge(folio, dst_vm << 2991 if (ret) << 2992 goto out_release; << 2993 ret = shmem_add_to_page_cache(folio, << 2994 if (ret) << 2995 goto out_release; << 2996 << 2997 ret = mfill_atomic_install_pte(dst_pm << 2998 &folio << 2999 if (ret) << 3000 goto out_delete_from_cache; << 3001 << 3002 shmem_recalc_inode(inode, 1, 0); << 3003 folio_unlock(folio); << 3004 return 0; << 3005 out_delete_from_cache: << 3006 filemap_remove_folio(folio); << 3007 out_release: << 3008 folio_unlock(folio); << 3009 folio_put(folio); << 3010 out_unacct_blocks: << 3011 shmem_inode_unacct_blocks(inode, 1); << 3012 return ret; << 3013 } << 3014 #endif /* CONFIG_USERFAULTFD */ << 3015 992 3016 #ifdef CONFIG_TMPFS !! 993 remove_suid(inode); 3017 static const struct inode_operations shmem_sy !! 994 inode->i_ctime = inode->i_mtime = CURRENT_TIME; 3018 static const struct inode_operations shmem_sh << 3019 995 3020 static int !! 996 do { 3021 shmem_write_begin(struct file *file, struct a !! 997 struct page *page = NULL; 3022 loff_t pos, unsigned !! 998 unsigned long bytes, index, offset; 3023 struct folio **foliop !! 999 char *kaddr; 3024 { !! 1000 int left; 3025 struct inode *inode = mapping->host; << 3026 struct shmem_inode_info *info = SHMEM << 3027 pgoff_t index = pos >> PAGE_SHIFT; << 3028 struct folio *folio; << 3029 int ret = 0; << 3030 << 3031 /* i_rwsem is held by caller */ << 3032 if (unlikely(info->seals & (F_SEAL_GR << 3033 F_SEAL_WRI << 3034 if (info->seals & (F_SEAL_WRI << 3035 return -EPERM; << 3036 if ((info->seals & F_SEAL_GRO << 3037 return -EPERM; << 3038 } << 3039 1001 3040 ret = shmem_get_folio(inode, index, p !! 1002 offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ 3041 if (ret) !! 1003 index = pos >> PAGE_CACHE_SHIFT; 3042 return ret; !! 1004 bytes = PAGE_CACHE_SIZE - offset; 3043 !! 1005 if (bytes > count) 3044 if (folio_test_hwpoison(folio) || !! 1006 bytes = count; 3045 (folio_test_large(folio) && folio << 3046 folio_unlock(folio); << 3047 folio_put(folio); << 3048 return -EIO; << 3049 } << 3050 1007 3051 *foliop = folio; !! 1008 /* 3052 return 0; !! 1009 * We don't hold page lock across copy from user - 3053 } !! 1010 * what would it guard against? - so no deadlock here. >> 1011 */ 3054 1012 3055 static int !! 1013 err = shmem_getpage(inode, index, &page, SGP_WRITE); 3056 shmem_write_end(struct file *file, struct add !! 1014 if (err) 3057 loff_t pos, unsigned !! 1015 break; 3058 struct folio *folio, << 3059 { << 3060 struct inode *inode = mapping->host; << 3061 << 3062 if (pos + copied > inode->i_size) << 3063 i_size_write(inode, pos + cop << 3064 << 3065 if (!folio_test_uptodate(folio)) { << 3066 if (copied < folio_size(folio << 3067 size_t from = offset_ << 3068 folio_zero_segments(f << 3069 from << 3070 } << 3071 folio_mark_uptodate(folio); << 3072 } << 3073 folio_mark_dirty(folio); << 3074 folio_unlock(folio); << 3075 folio_put(folio); << 3076 1016 3077 return copied; !! 1017 kaddr = kmap(page); >> 1018 left = __copy_from_user(kaddr + offset, buf, bytes); >> 1019 kunmap(page); >> 1020 >> 1021 written += bytes; >> 1022 count -= bytes; >> 1023 pos += bytes; >> 1024 buf += bytes; >> 1025 if (pos > inode->i_size) >> 1026 inode->i_size = pos; >> 1027 >> 1028 flush_dcache_page(page); >> 1029 SetPageDirty(page); >> 1030 SetPageReferenced(page); >> 1031 page_cache_release(page); >> 1032 >> 1033 if (left) { >> 1034 pos -= left; >> 1035 written -= left; >> 1036 err = -EFAULT; >> 1037 break; >> 1038 } >> 1039 } while (count); >> 1040 >> 1041 *ppos = pos; >> 1042 if (written) >> 1043 err = written; >> 1044 out: >> 1045 up(&inode->i_sem); >> 1046 return err; 3078 } 1047 } 3079 1048 3080 static ssize_t shmem_file_read_iter(struct ki !! 1049 static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_t *desc) 3081 { 1050 { 3082 struct file *file = iocb->ki_filp; !! 1051 struct inode *inode = filp->f_dentry->d_inode; 3083 struct inode *inode = file_inode(file << 3084 struct address_space *mapping = inode 1052 struct address_space *mapping = inode->i_mapping; 3085 pgoff_t index; !! 1053 unsigned long index, offset; 3086 unsigned long offset; !! 1054 loff_t pos = *ppos; 3087 int error = 0; << 3088 ssize_t retval = 0; << 3089 loff_t *ppos = &iocb->ki_pos; << 3090 1055 3091 index = *ppos >> PAGE_SHIFT; !! 1056 if (unlikely(pos < 0)) 3092 offset = *ppos & ~PAGE_MASK; !! 1057 return; >> 1058 >> 1059 index = pos >> PAGE_CACHE_SHIFT; >> 1060 offset = pos & ~PAGE_CACHE_MASK; 3093 1061 3094 for (;;) { 1062 for (;;) { 3095 struct folio *folio = NULL; << 3096 struct page *page = NULL; 1063 struct page *page = NULL; 3097 pgoff_t end_index; !! 1064 unsigned long end_index, nr, ret; 3098 unsigned long nr, ret; << 3099 loff_t i_size = i_size_read(i << 3100 1065 3101 end_index = i_size >> PAGE_SH !! 1066 end_index = inode->i_size >> PAGE_CACHE_SHIFT; 3102 if (index > end_index) 1067 if (index > end_index) 3103 break; 1068 break; 3104 if (index == end_index) { 1069 if (index == end_index) { 3105 nr = i_size & ~PAGE_M !! 1070 nr = inode->i_size & ~PAGE_CACHE_MASK; 3106 if (nr <= offset) 1071 if (nr <= offset) 3107 break; 1072 break; 3108 } 1073 } 3109 1074 3110 error = shmem_get_folio(inode !! 1075 desc->error = shmem_getpage(inode, index, &page, SGP_READ); 3111 if (error) { !! 1076 if (desc->error) { 3112 if (error == -EINVAL) !! 1077 if (desc->error == -EINVAL) 3113 error = 0; !! 1078 desc->error = 0; 3114 break; 1079 break; 3115 } 1080 } 3116 if (folio) { << 3117 folio_unlock(folio); << 3118 << 3119 page = folio_file_pag << 3120 if (PageHWPoison(page << 3121 folio_put(fol << 3122 error = -EIO; << 3123 break; << 3124 } << 3125 } << 3126 1081 3127 /* 1082 /* 3128 * We must evaluate after, si 1083 * We must evaluate after, since reads (unlike writes) 3129 * are called without i_rwsem !! 1084 * are called without i_sem protection against truncate 3130 */ 1085 */ 3131 nr = PAGE_SIZE; !! 1086 nr = PAGE_CACHE_SIZE; 3132 i_size = i_size_read(inode); !! 1087 end_index = inode->i_size >> PAGE_CACHE_SHIFT; 3133 end_index = i_size >> PAGE_SH << 3134 if (index == end_index) { 1088 if (index == end_index) { 3135 nr = i_size & ~PAGE_M !! 1089 nr = inode->i_size & ~PAGE_CACHE_MASK; 3136 if (nr <= offset) { 1090 if (nr <= offset) { 3137 if (folio) !! 1091 page_cache_release(page); 3138 folio << 3139 break; 1092 break; 3140 } 1093 } 3141 } 1094 } 3142 nr -= offset; 1095 nr -= offset; 3143 1096 3144 if (folio) { !! 1097 if (page != ZERO_PAGE(0)) { 3145 /* 1098 /* 3146 * If users can be wr 1099 * If users can be writing to this page using arbitrary 3147 * virtual addresses, 1100 * virtual addresses, take care about potential aliasing 3148 * before reading the 1101 * before reading the page on the kernel side. 3149 */ 1102 */ 3150 if (mapping_writably_ !! 1103 if (mapping->i_mmap_shared != NULL) 3151 flush_dcache_ 1104 flush_dcache_page(page); 3152 /* 1105 /* 3153 * Mark the page acce !! 1106 * Mark the page accessed if we read the >> 1107 * beginning or we just did an lseek. 3154 */ 1108 */ 3155 if (!offset) !! 1109 if (!offset || !filp->f_reada) 3156 folio_mark_ac !! 1110 mark_page_accessed(page); 3157 /* << 3158 * Ok, we have the pa << 3159 * now we can copy it << 3160 */ << 3161 ret = copy_page_to_it << 3162 folio_put(folio); << 3163 << 3164 } else if (user_backed_iter(t << 3165 /* << 3166 * Copy to user tends << 3167 * clear_user() not s << 3168 * faster to copy the << 3169 */ << 3170 ret = copy_page_to_it << 3171 } else { << 3172 /* << 3173 * But submitting the << 3174 * splice() - or othe << 3175 * so don't attempt t << 3176 */ << 3177 ret = iov_iter_zero(n << 3178 } 1111 } 3179 1112 3180 retval += ret; !! 1113 /* >> 1114 * Ok, we have the page, and it's up-to-date, so >> 1115 * now we can copy it to user space... >> 1116 * >> 1117 * The actor routine returns how many bytes were actually used.. >> 1118 * NOTE! This may not be the same as how much of a user buffer >> 1119 * we filled up (we may be padding etc), so we can only update >> 1120 * "pos" here (the actor routine has to update the user buffer >> 1121 * pointers and the remaining count). >> 1122 */ >> 1123 ret = file_read_actor(desc, page, offset, nr); 3181 offset += ret; 1124 offset += ret; 3182 index += offset >> PAGE_SHIFT !! 1125 index += offset >> PAGE_CACHE_SHIFT; 3183 offset &= ~PAGE_MASK; !! 1126 offset &= ~PAGE_CACHE_MASK; 3184 1127 3185 if (!iov_iter_count(to)) !! 1128 page_cache_release(page); 3186 break; !! 1129 if (ret != nr || !desc->count) 3187 if (ret < nr) { << 3188 error = -EFAULT; << 3189 break; 1130 break; 3190 } << 3191 cond_resched(); << 3192 } 1131 } 3193 1132 3194 *ppos = ((loff_t) index << PAGE_SHIFT !! 1133 *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset; 3195 file_accessed(file); !! 1134 filp->f_reada = 1; 3196 return retval ? retval : error; !! 1135 UPDATE_ATIME(inode); 3197 } << 3198 << 3199 static ssize_t shmem_file_write_iter(struct k << 3200 { << 3201 struct file *file = iocb->ki_filp; << 3202 struct inode *inode = file->f_mapping << 3203 ssize_t ret; << 3204 << 3205 inode_lock(inode); << 3206 ret = generic_write_checks(iocb, from << 3207 if (ret <= 0) << 3208 goto unlock; << 3209 ret = file_remove_privs(file); << 3210 if (ret) << 3211 goto unlock; << 3212 ret = file_update_time(file); << 3213 if (ret) << 3214 goto unlock; << 3215 ret = generic_perform_write(iocb, fro << 3216 unlock: << 3217 inode_unlock(inode); << 3218 return ret; << 3219 } 1136 } 3220 1137 3221 static bool zero_pipe_buf_get(struct pipe_ino !! 1138 static ssize_t shmem_file_read(struct file *filp, char *buf, size_t count, loff_t *ppos) 3222 struct pipe_buf << 3223 { 1139 { 3224 return true; !! 1140 read_descriptor_t desc; 3225 } << 3226 1141 3227 static void zero_pipe_buf_release(struct pipe !! 1142 if ((ssize_t) count < 0) 3228 struct pipe !! 1143 return -EINVAL; 3229 { !! 1144 if (!access_ok(VERIFY_WRITE, buf, count)) 3230 } !! 1145 return -EFAULT; >> 1146 if (!count) >> 1147 return 0; 3231 1148 3232 static bool zero_pipe_buf_try_steal(struct pi !! 1149 desc.written = 0; 3233 struct pi !! 1150 desc.count = count; 3234 { !! 1151 desc.buf = buf; 3235 return false; !! 1152 desc.error = 0; >> 1153 >> 1154 do_shmem_file_read(filp, ppos, &desc); >> 1155 if (desc.written) >> 1156 return desc.written; >> 1157 return desc.error; 3236 } 1158 } 3237 1159 3238 static const struct pipe_buf_operations zero_ !! 1160 static int shmem_statfs(struct super_block *sb, struct statfs *buf) 3239 .release = zero_pipe_buf_relea << 3240 .try_steal = zero_pipe_buf_try_s << 3241 .get = zero_pipe_buf_get, << 3242 }; << 3243 << 3244 static size_t splice_zeropage_into_pipe(struc << 3245 loff_ << 3246 { 1161 { 3247 size_t offset = fpos & ~PAGE_MASK; !! 1162 struct shmem_sb_info *sbinfo = SHMEM_SB(sb); 3248 << 3249 size = min_t(size_t, size, PAGE_SIZE << 3250 << 3251 if (!pipe_full(pipe->head, pipe->tail << 3252 struct pipe_buffer *buf = pip << 3253 << 3254 *buf = (struct pipe_buffer) { << 3255 .ops = &zero_pipe_ << 3256 .page = ZERO_PAGE(0 << 3257 .offset = offset, << 3258 .len = size, << 3259 }; << 3260 pipe->head++; << 3261 } << 3262 1163 3263 return size; !! 1164 buf->f_type = TMPFS_MAGIC; >> 1165 buf->f_bsize = PAGE_CACHE_SIZE; >> 1166 spin_lock(&sbinfo->stat_lock); >> 1167 buf->f_blocks = sbinfo->max_blocks; >> 1168 buf->f_bavail = buf->f_bfree = sbinfo->free_blocks; >> 1169 buf->f_files = sbinfo->max_inodes; >> 1170 buf->f_ffree = sbinfo->free_inodes; >> 1171 spin_unlock(&sbinfo->stat_lock); >> 1172 buf->f_namelen = NAME_MAX; >> 1173 return 0; 3264 } 1174 } 3265 1175 3266 static ssize_t shmem_file_splice_read(struct !! 1176 /* 3267 struct !! 1177 * Retaining negative dentries for an in-memory filesystem just wastes 3268 size_t !! 1178 * memory and lookup time: arrange for them to be deleted immediately. >> 1179 */ >> 1180 static int shmem_delete_dentry(struct dentry *dentry) 3269 { 1181 { 3270 struct inode *inode = file_inode(in); !! 1182 return 1; 3271 struct address_space *mapping = inode << 3272 struct folio *folio = NULL; << 3273 size_t total_spliced = 0, used, npage << 3274 loff_t isize; << 3275 int error = 0; << 3276 << 3277 /* Work out how much data we can actu << 3278 used = pipe_occupancy(pipe->head, pip << 3279 npages = max_t(ssize_t, pipe->max_usa << 3280 len = min_t(size_t, len, npages * PAG << 3281 << 3282 do { << 3283 if (*ppos >= i_size_read(inod << 3284 break; << 3285 << 3286 error = shmem_get_folio(inode << 3287 SGP_R << 3288 if (error) { << 3289 if (error == -EINVAL) << 3290 error = 0; << 3291 break; << 3292 } << 3293 if (folio) { << 3294 folio_unlock(folio); << 3295 << 3296 if (folio_test_hwpois << 3297 (folio_test_large << 3298 folio_test_has_h << 3299 error = -EIO; << 3300 break; << 3301 } << 3302 } << 3303 << 3304 /* << 3305 * i_size must be checked aft << 3306 * << 3307 * Checking i_size after the << 3308 * the correct value for "nr" << 3309 * part of the page is not co << 3310 * another truncate extends t << 3311 */ << 3312 isize = i_size_read(inode); << 3313 if (unlikely(*ppos >= isize)) << 3314 break; << 3315 part = min_t(loff_t, isize - << 3316 << 3317 if (folio) { << 3318 /* << 3319 * If users can be wr << 3320 * virtual addresses, << 3321 * before reading the << 3322 */ << 3323 if (mapping_writably_ << 3324 flush_dcache_ << 3325 folio_mark_accessed(f << 3326 /* << 3327 * Ok, we have the pa << 3328 * now splice it into << 3329 */ << 3330 n = splice_folio_into << 3331 folio_put(folio); << 3332 folio = NULL; << 3333 } else { << 3334 n = splice_zeropage_i << 3335 } << 3336 << 3337 if (!n) << 3338 break; << 3339 len -= n; << 3340 total_spliced += n; << 3341 *ppos += n; << 3342 in->f_ra.prev_pos = *ppos; << 3343 if (pipe_full(pipe->head, pip << 3344 break; << 3345 << 3346 cond_resched(); << 3347 } while (len); << 3348 << 3349 if (folio) << 3350 folio_put(folio); << 3351 << 3352 file_accessed(in); << 3353 return total_spliced ? total_spliced << 3354 } 1183 } 3355 1184 3356 static loff_t shmem_file_llseek(struct file * !! 1185 /* >> 1186 * Lookup the data. This is trivial - if the dentry didn't already >> 1187 * exist, we know it is negative. Set d_op to delete negative dentries. >> 1188 */ >> 1189 static struct dentry *shmem_lookup(struct inode *dir, struct dentry *dentry) 3357 { 1190 { 3358 struct address_space *mapping = file- !! 1191 static struct dentry_operations shmem_dentry_operations = { 3359 struct inode *inode = mapping->host; !! 1192 .d_delete = shmem_delete_dentry, 3360 !! 1193 }; 3361 if (whence != SEEK_DATA && whence != << 3362 return generic_file_llseek_si << 3363 MAX_L << 3364 if (offset < 0) << 3365 return -ENXIO; << 3366 1194 3367 inode_lock(inode); !! 1195 if (dentry->d_name.len > NAME_MAX) 3368 /* We're holding i_rwsem so we can ac !! 1196 return ERR_PTR(-ENAMETOOLONG); 3369 offset = mapping_seek_hole_data(mappi !! 1197 dentry->d_op = &shmem_dentry_operations; 3370 if (offset >= 0) !! 1198 d_add(dentry, NULL); 3371 offset = vfs_setpos(file, off !! 1199 return NULL; 3372 inode_unlock(inode); << 3373 return offset; << 3374 } 1200 } 3375 1201 3376 static long shmem_fallocate(struct file *file !! 1202 /* 3377 !! 1203 * File creation. Allocate an inode, and we're done.. >> 1204 */ >> 1205 static int shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, int dev) 3378 { 1206 { 3379 struct inode *inode = file_inode(file !! 1207 struct inode *inode = shmem_get_inode(dir->i_sb, mode, dev); 3380 struct shmem_sb_info *sbinfo = SHMEM_ !! 1208 int error = -ENOSPC; 3381 struct shmem_inode_info *info = SHMEM << 3382 struct shmem_falloc shmem_falloc; << 3383 pgoff_t start, index, end, undo_fallo << 3384 int error; << 3385 1209 3386 if (mode & ~(FALLOC_FL_KEEP_SIZE | FA !! 1210 if (inode) { 3387 return -EOPNOTSUPP; !! 1211 if (dir->i_mode & S_ISGID) { 3388 !! 1212 inode->i_gid = dir->i_gid; 3389 inode_lock(inode); !! 1213 if (S_ISDIR(mode)) 3390 !! 1214 inode->i_mode |= S_ISGID; 3391 if (mode & FALLOC_FL_PUNCH_HOLE) { !! 1215 } 3392 struct address_space *mapping !! 1216 dir->i_size += BOGO_DIRENT_SIZE; 3393 loff_t unmap_start = round_up !! 1217 dir->i_ctime = dir->i_mtime = CURRENT_TIME; 3394 loff_t unmap_end = round_down !! 1218 d_instantiate(dentry, inode); 3395 DECLARE_WAIT_QUEUE_HEAD_ONSTA !! 1219 dget(dentry); /* Extra count - pin the dentry in core */ 3396 << 3397 /* protected by i_rwsem */ << 3398 if (info->seals & (F_SEAL_WRI << 3399 error = -EPERM; << 3400 goto out; << 3401 } << 3402 << 3403 shmem_falloc.waitq = &shmem_f << 3404 shmem_falloc.start = (u64)unm << 3405 shmem_falloc.next = (unmap_en << 3406 spin_lock(&inode->i_lock); << 3407 inode->i_private = &shmem_fal << 3408 spin_unlock(&inode->i_lock); << 3409 << 3410 if ((u64)unmap_end > (u64)unm << 3411 unmap_mapping_range(m << 3412 1 << 3413 shmem_truncate_range(inode, o << 3414 /* No need to unmap again: ho << 3415 << 3416 spin_lock(&inode->i_lock); << 3417 inode->i_private = NULL; << 3418 wake_up_all(&shmem_falloc_wai << 3419 WARN_ON_ONCE(!list_empty(&shm << 3420 spin_unlock(&inode->i_lock); << 3421 error = 0; 1220 error = 0; 3422 goto out; << 3423 } << 3424 << 3425 /* We need to check rlimit even when << 3426 error = inode_newsize_ok(inode, offse << 3427 if (error) << 3428 goto out; << 3429 << 3430 if ((info->seals & F_SEAL_GROW) && of << 3431 error = -EPERM; << 3432 goto out; << 3433 } << 3434 << 3435 start = offset >> PAGE_SHIFT; << 3436 end = (offset + len + PAGE_SIZE - 1) << 3437 /* Try to avoid a swapstorm if len is << 3438 if (sbinfo->max_blocks && end - start << 3439 error = -ENOSPC; << 3440 goto out; << 3441 } << 3442 << 3443 shmem_falloc.waitq = NULL; << 3444 shmem_falloc.start = start; << 3445 shmem_falloc.next = start; << 3446 shmem_falloc.nr_falloced = 0; << 3447 shmem_falloc.nr_unswapped = 0; << 3448 spin_lock(&inode->i_lock); << 3449 inode->i_private = &shmem_falloc; << 3450 spin_unlock(&inode->i_lock); << 3451 << 3452 /* << 3453 * info->fallocend is only relevant w << 3454 * involved: to prevent split_huge_pa << 3455 * pages when FALLOC_FL_KEEP_SIZE com << 3456 */ << 3457 undo_fallocend = info->fallocend; << 3458 if (info->fallocend < end) << 3459 info->fallocend = end; << 3460 << 3461 for (index = start; index < end; ) { << 3462 struct folio *folio; << 3463 << 3464 /* << 3465 * Check for fatal signal so << 3466 * situations. We don't want << 3467 * signals as large fallocate << 3468 * e.g. periodic timers may r << 3469 * restarting. << 3470 */ << 3471 if (fatal_signal_pending(curr << 3472 error = -EINTR; << 3473 else if (shmem_falloc.nr_unsw << 3474 error = -ENOMEM; << 3475 else << 3476 error = shmem_get_fol << 3477 << 3478 if (error) { << 3479 info->fallocend = und << 3480 /* Remove the !uptoda << 3481 if (index > start) { << 3482 shmem_undo_ra << 3483 (loff_t)s << 3484 ((loff_t) << 3485 } << 3486 goto undone; << 3487 } << 3488 << 3489 /* << 3490 * Here is a more important o << 3491 * a second SGP_FALLOC on the << 3492 * making it uptodate and un- << 3493 */ << 3494 index = folio_next_index(foli << 3495 /* Beware 32-bit wraparound * << 3496 if (!index) << 3497 index--; << 3498 << 3499 /* << 3500 * Inform shmem_writepage() h << 3501 * No need for lock or barrie << 3502 */ << 3503 if (!folio_test_uptodate(foli << 3504 shmem_falloc.nr_fallo << 3505 shmem_falloc.next = index; << 3506 << 3507 /* << 3508 * If !uptodate, leave it tha << 3509 * can be recognized if we ne << 3510 * But mark it dirty so that << 3511 * than free the folios we ar << 3512 * might still be clean: we n << 3513 */ << 3514 folio_mark_dirty(folio); << 3515 folio_unlock(folio); << 3516 folio_put(folio); << 3517 cond_resched(); << 3518 } 1221 } 3519 << 3520 if (!(mode & FALLOC_FL_KEEP_SIZE) && << 3521 i_size_write(inode, offset + << 3522 undone: << 3523 spin_lock(&inode->i_lock); << 3524 inode->i_private = NULL; << 3525 spin_unlock(&inode->i_lock); << 3526 out: << 3527 if (!error) << 3528 file_modified(file); << 3529 inode_unlock(inode); << 3530 return error; 1222 return error; 3531 } 1223 } 3532 1224 3533 static int shmem_statfs(struct dentry *dentry !! 1225 static int shmem_mkdir(struct inode *dir, struct dentry *dentry, int mode) 3534 { 1226 { 3535 struct shmem_sb_info *sbinfo = SHMEM_ !! 1227 int error; 3536 << 3537 buf->f_type = TMPFS_MAGIC; << 3538 buf->f_bsize = PAGE_SIZE; << 3539 buf->f_namelen = NAME_MAX; << 3540 if (sbinfo->max_blocks) { << 3541 buf->f_blocks = sbinfo->max_b << 3542 buf->f_bavail = << 3543 buf->f_bfree = sbinfo->max_b << 3544 percpu_counte << 3545 } << 3546 if (sbinfo->max_inodes) { << 3547 buf->f_files = sbinfo->max_in << 3548 buf->f_ffree = sbinfo->free_i << 3549 } << 3550 /* else leave those fields 0 like sim << 3551 << 3552 buf->f_fsid = uuid_to_fsid(dentry->d_ << 3553 1228 >> 1229 if ((error = shmem_mknod(dir, dentry, mode | S_IFDIR, 0))) >> 1230 return error; >> 1231 dir->i_nlink++; 3554 return 0; 1232 return 0; 3555 } 1233 } 3556 1234 >> 1235 static int shmem_create(struct inode *dir, struct dentry *dentry, int mode) >> 1236 { >> 1237 return shmem_mknod(dir, dentry, mode | S_IFREG, 0); >> 1238 } >> 1239 3557 /* 1240 /* 3558 * File creation. Allocate an inode, and we'r !! 1241 * Link a file.. 3559 */ 1242 */ 3560 static int !! 1243 static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) 3561 shmem_mknod(struct mnt_idmap *idmap, struct i << 3562 struct dentry *dentry, umode_t mo << 3563 { 1244 { 3564 struct inode *inode; !! 1245 struct inode *inode = old_dentry->d_inode; 3565 int error; << 3566 << 3567 inode = shmem_get_inode(idmap, dir->i << 3568 if (IS_ERR(inode)) << 3569 return PTR_ERR(inode); << 3570 << 3571 error = simple_acl_create(dir, inode) << 3572 if (error) << 3573 goto out_iput; << 3574 error = security_inode_init_security( << 3575 << 3576 if (error && error != -EOPNOTSUPP) << 3577 goto out_iput; << 3578 1246 3579 error = simple_offset_add(shmem_get_o !! 1247 if (S_ISDIR(inode->i_mode)) 3580 if (error) !! 1248 return -EPERM; 3581 goto out_iput; << 3582 1249 3583 dir->i_size += BOGO_DIRENT_SIZE; 1250 dir->i_size += BOGO_DIRENT_SIZE; 3584 inode_set_mtime_to_ts(dir, inode_set_ !! 1251 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; 3585 inode_inc_iversion(dir); !! 1252 inode->i_nlink++; >> 1253 atomic_inc(&inode->i_count); /* New dentry reference */ >> 1254 dget(dentry); /* Extra pinning count for the created dentry */ 3586 d_instantiate(dentry, inode); 1255 d_instantiate(dentry, inode); 3587 dget(dentry); /* Extra count - pin th << 3588 return error; << 3589 << 3590 out_iput: << 3591 iput(inode); << 3592 return error; << 3593 } << 3594 << 3595 static int << 3596 shmem_tmpfile(struct mnt_idmap *idmap, struct << 3597 struct file *file, umode_t mode << 3598 { << 3599 struct inode *inode; << 3600 int error; << 3601 << 3602 inode = shmem_get_inode(idmap, dir->i << 3603 if (IS_ERR(inode)) { << 3604 error = PTR_ERR(inode); << 3605 goto err_out; << 3606 } << 3607 error = security_inode_init_security( << 3608 << 3609 if (error && error != -EOPNOTSUPP) << 3610 goto out_iput; << 3611 error = simple_acl_create(dir, inode) << 3612 if (error) << 3613 goto out_iput; << 3614 d_tmpfile(file, inode); << 3615 << 3616 err_out: << 3617 return finish_open_simple(file, error << 3618 out_iput: << 3619 iput(inode); << 3620 return error; << 3621 } << 3622 << 3623 static int shmem_mkdir(struct mnt_idmap *idma << 3624 struct dentry *dentry, << 3625 { << 3626 int error; << 3627 << 3628 error = shmem_mknod(idmap, dir, dentr << 3629 if (error) << 3630 return error; << 3631 inc_nlink(dir); << 3632 return 0; 1256 return 0; 3633 } 1257 } 3634 1258 3635 static int shmem_create(struct mnt_idmap *idm !! 1259 static inline int shmem_positive(struct dentry *dentry) 3636 struct dentry *dentry << 3637 { 1260 { 3638 return shmem_mknod(idmap, dir, dentry !! 1261 return dentry->d_inode && !d_unhashed(dentry); 3639 } 1262 } 3640 1263 3641 /* 1264 /* 3642 * Link a file.. !! 1265 * Check that a directory is empty (this works >> 1266 * for regular files too, they'll just always be >> 1267 * considered empty..). >> 1268 * >> 1269 * Note that an empty directory can still have >> 1270 * children, they just all have to be negative.. 3643 */ 1271 */ 3644 static int shmem_link(struct dentry *old_dent !! 1272 static int shmem_empty(struct dentry *dentry) 3645 struct dentry *dentry) << 3646 { 1273 { 3647 struct inode *inode = d_inode(old_den !! 1274 struct list_head *list; 3648 int ret = 0; << 3649 1275 3650 /* !! 1276 spin_lock(&dcache_lock); 3651 * No ordinary (disk based) filesyste !! 1277 list = dentry->d_subdirs.next; 3652 * but each new link needs a new dent << 3653 * tmpfs dentries cannot be pruned un << 3654 * But if an O_TMPFILE file is linked << 3655 * first link must skip that, to get << 3656 */ << 3657 if (inode->i_nlink) { << 3658 ret = shmem_reserve_inode(ino << 3659 if (ret) << 3660 goto out; << 3661 } << 3662 1278 3663 ret = simple_offset_add(shmem_get_off !! 1279 while (list != &dentry->d_subdirs) { 3664 if (ret) { !! 1280 struct dentry *de = list_entry(list, struct dentry, d_child); 3665 if (inode->i_nlink) << 3666 shmem_free_inode(inod << 3667 goto out; << 3668 } << 3669 1281 3670 dir->i_size += BOGO_DIRENT_SIZE; !! 1282 if (shmem_positive(de)) { 3671 inode_set_mtime_to_ts(dir, !! 1283 spin_unlock(&dcache_lock); 3672 inode_set_ctime !! 1284 return 0; 3673 inode_inc_iversion(dir); !! 1285 } 3674 inc_nlink(inode); !! 1286 list = list->next; 3675 ihold(inode); /* New dentry referen !! 1287 } 3676 dget(dentry); /* Extra pinning coun !! 1288 spin_unlock(&dcache_lock); 3677 d_instantiate(dentry, inode); !! 1289 return 1; 3678 out: << 3679 return ret; << 3680 } 1290 } 3681 1291 3682 static int shmem_unlink(struct inode *dir, st 1292 static int shmem_unlink(struct inode *dir, struct dentry *dentry) 3683 { 1293 { 3684 struct inode *inode = d_inode(dentry) !! 1294 struct inode *inode = dentry->d_inode; 3685 << 3686 if (inode->i_nlink > 1 && !S_ISDIR(in << 3687 shmem_free_inode(inode->i_sb, << 3688 << 3689 simple_offset_remove(shmem_get_offset << 3690 1295 3691 dir->i_size -= BOGO_DIRENT_SIZE; 1296 dir->i_size -= BOGO_DIRENT_SIZE; 3692 inode_set_mtime_to_ts(dir, !! 1297 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; 3693 inode_set_ctime !! 1298 inode->i_nlink--; 3694 inode_inc_iversion(dir); !! 1299 dput(dentry); /* Undo the count from "create" - this does all the work */ 3695 drop_nlink(inode); << 3696 dput(dentry); /* Undo the count fro << 3697 return 0; 1300 return 0; 3698 } 1301 } 3699 1302 3700 static int shmem_rmdir(struct inode *dir, str 1303 static int shmem_rmdir(struct inode *dir, struct dentry *dentry) 3701 { 1304 { 3702 if (!simple_offset_empty(dentry)) !! 1305 if (!shmem_empty(dentry)) 3703 return -ENOTEMPTY; 1306 return -ENOTEMPTY; 3704 1307 3705 drop_nlink(d_inode(dentry)); !! 1308 dir->i_nlink--; 3706 drop_nlink(dir); << 3707 return shmem_unlink(dir, dentry); 1309 return shmem_unlink(dir, dentry); 3708 } 1310 } 3709 1311 3710 static int shmem_whiteout(struct mnt_idmap *i << 3711 struct inode *old_d << 3712 { << 3713 struct dentry *whiteout; << 3714 int error; << 3715 << 3716 whiteout = d_alloc(old_dentry->d_pare << 3717 if (!whiteout) << 3718 return -ENOMEM; << 3719 << 3720 error = shmem_mknod(idmap, old_dir, w << 3721 S_IFCHR | WHITEOU << 3722 dput(whiteout); << 3723 if (error) << 3724 return error; << 3725 << 3726 /* << 3727 * Cheat and hash the whiteout while << 3728 * place, instead of playing games wi << 3729 * << 3730 * d_lookup() will consistently find << 3731 * not sure which one, but that isn't << 3732 */ << 3733 d_rehash(whiteout); << 3734 return 0; << 3735 } << 3736 << 3737 /* 1312 /* 3738 * The VFS layer already does all the dentry 1313 * The VFS layer already does all the dentry stuff for rename, 3739 * we just have to decrement the usage count 1314 * we just have to decrement the usage count for the target if 3740 * it exists so that the VFS layer correctly 1315 * it exists so that the VFS layer correctly free's it when it 3741 * gets overwritten. 1316 * gets overwritten. 3742 */ 1317 */ 3743 static int shmem_rename2(struct mnt_idmap *id !! 1318 static int shmem_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) 3744 struct inode *old_di << 3745 struct inode *new_di << 3746 unsigned int flags) << 3747 { 1319 { 3748 struct inode *inode = d_inode(old_den !! 1320 struct inode *inode = old_dentry->d_inode; 3749 int they_are_dirs = S_ISDIR(inode->i_ 1321 int they_are_dirs = S_ISDIR(inode->i_mode); 3750 int error; << 3751 << 3752 if (flags & ~(RENAME_NOREPLACE | RENA << 3753 return -EINVAL; << 3754 1322 3755 if (flags & RENAME_EXCHANGE) !! 1323 if (!shmem_empty(new_dentry)) 3756 return simple_offset_rename_e << 3757 << 3758 << 3759 if (!simple_offset_empty(new_dentry)) << 3760 return -ENOTEMPTY; 1324 return -ENOTEMPTY; 3761 1325 3762 if (flags & RENAME_WHITEOUT) { !! 1326 if (new_dentry->d_inode) { 3763 error = shmem_whiteout(idmap, << 3764 if (error) << 3765 return error; << 3766 } << 3767 << 3768 error = simple_offset_rename(old_dir, << 3769 if (error) << 3770 return error; << 3771 << 3772 if (d_really_is_positive(new_dentry)) << 3773 (void) shmem_unlink(new_dir, 1327 (void) shmem_unlink(new_dir, new_dentry); 3774 if (they_are_dirs) { !! 1328 if (they_are_dirs) 3775 drop_nlink(d_inode(ne !! 1329 old_dir->i_nlink--; 3776 drop_nlink(old_dir); << 3777 } << 3778 } else if (they_are_dirs) { 1330 } else if (they_are_dirs) { 3779 drop_nlink(old_dir); !! 1331 old_dir->i_nlink--; 3780 inc_nlink(new_dir); !! 1332 new_dir->i_nlink++; 3781 } 1333 } 3782 1334 3783 old_dir->i_size -= BOGO_DIRENT_SIZE; 1335 old_dir->i_size -= BOGO_DIRENT_SIZE; 3784 new_dir->i_size += BOGO_DIRENT_SIZE; 1336 new_dir->i_size += BOGO_DIRENT_SIZE; 3785 simple_rename_timestamp(old_dir, old_ !! 1337 old_dir->i_ctime = old_dir->i_mtime = 3786 inode_inc_iversion(old_dir); !! 1338 new_dir->i_ctime = new_dir->i_mtime = 3787 inode_inc_iversion(new_dir); !! 1339 inode->i_ctime = CURRENT_TIME; 3788 return 0; 1340 return 0; 3789 } 1341 } 3790 1342 3791 static int shmem_symlink(struct mnt_idmap *id !! 1343 static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *symname) 3792 struct dentry *dentr << 3793 { 1344 { 3794 int error; 1345 int error; 3795 int len; 1346 int len; 3796 struct inode *inode; 1347 struct inode *inode; 3797 struct folio *folio; !! 1348 struct page *page = NULL; >> 1349 char *kaddr; >> 1350 struct shmem_inode_info *info; 3798 1351 3799 len = strlen(symname) + 1; 1352 len = strlen(symname) + 1; 3800 if (len > PAGE_SIZE) !! 1353 if (len > PAGE_CACHE_SIZE) 3801 return -ENAMETOOLONG; 1354 return -ENAMETOOLONG; 3802 1355 3803 inode = shmem_get_inode(idmap, dir->i !! 1356 inode = shmem_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0); 3804 VM_NORESERVE) !! 1357 if (!inode) 3805 if (IS_ERR(inode)) !! 1358 return -ENOSPC; 3806 return PTR_ERR(inode); << 3807 << 3808 error = security_inode_init_security( << 3809 << 3810 if (error && error != -EOPNOTSUPP) << 3811 goto out_iput; << 3812 << 3813 error = simple_offset_add(shmem_get_o << 3814 if (error) << 3815 goto out_iput; << 3816 1359 >> 1360 info = SHMEM_I(inode); 3817 inode->i_size = len-1; 1361 inode->i_size = len-1; 3818 if (len <= SHORT_SYMLINK_LEN) { !! 1362 if (len <= sizeof(struct shmem_inode_info)) { 3819 inode->i_link = kmemdup(symna !! 1363 /* do it inline */ 3820 if (!inode->i_link) { !! 1364 memcpy(info, symname, len); 3821 error = -ENOMEM; !! 1365 inode->i_op = &shmem_symlink_inline_operations; 3822 goto out_remove_offse << 3823 } << 3824 inode->i_op = &shmem_short_sy << 3825 } else { 1366 } else { 3826 inode_nohighmem(inode); !! 1367 error = shmem_getpage(inode, 0, &page, SGP_WRITE); 3827 inode->i_mapping->a_ops = &sh !! 1368 if (error) { 3828 error = shmem_get_folio(inode !! 1369 iput(inode); 3829 if (error) !! 1370 return error; 3830 goto out_remove_offse !! 1371 } 3831 inode->i_op = &shmem_symlink_ 1372 inode->i_op = &shmem_symlink_inode_operations; 3832 memcpy(folio_address(folio), !! 1373 spin_lock(&shmem_ilock); 3833 folio_mark_uptodate(folio); !! 1374 list_add_tail(&info->list, &shmem_inodes); 3834 folio_mark_dirty(folio); !! 1375 spin_unlock(&shmem_ilock); 3835 folio_unlock(folio); !! 1376 kaddr = kmap(page); 3836 folio_put(folio); !! 1377 memcpy(kaddr, symname, len); >> 1378 kunmap(page); >> 1379 SetPageDirty(page); >> 1380 page_cache_release(page); 3837 } 1381 } >> 1382 if (dir->i_mode & S_ISGID) >> 1383 inode->i_gid = dir->i_gid; 3838 dir->i_size += BOGO_DIRENT_SIZE; 1384 dir->i_size += BOGO_DIRENT_SIZE; 3839 inode_set_mtime_to_ts(dir, inode_set_ !! 1385 dir->i_ctime = dir->i_mtime = CURRENT_TIME; 3840 inode_inc_iversion(dir); << 3841 d_instantiate(dentry, inode); 1386 d_instantiate(dentry, inode); 3842 dget(dentry); 1387 dget(dentry); 3843 return 0; 1388 return 0; 3844 << 3845 out_remove_offset: << 3846 simple_offset_remove(shmem_get_offset << 3847 out_iput: << 3848 iput(inode); << 3849 return error; << 3850 } << 3851 << 3852 static void shmem_put_link(void *arg) << 3853 { << 3854 folio_mark_accessed(arg); << 3855 folio_put(arg); << 3856 } 1389 } 3857 1390 3858 static const char *shmem_get_link(struct dent !! 1391 static int shmem_readlink_inline(struct dentry *dentry, char *buffer, int buflen) 3859 struct dela << 3860 { 1392 { 3861 struct folio *folio = NULL; !! 1393 return vfs_readlink(dentry, buffer, buflen, (const char *)SHMEM_I(dentry->d_inode)); 3862 int error; << 3863 << 3864 if (!dentry) { << 3865 folio = filemap_get_folio(ino << 3866 if (IS_ERR(folio)) << 3867 return ERR_PTR(-ECHIL << 3868 if (PageHWPoison(folio_page(f << 3869 !folio_test_uptodate(foli << 3870 folio_put(folio); << 3871 return ERR_PTR(-ECHIL << 3872 } << 3873 } else { << 3874 error = shmem_get_folio(inode << 3875 if (error) << 3876 return ERR_PTR(error) << 3877 if (!folio) << 3878 return ERR_PTR(-ECHIL << 3879 if (PageHWPoison(folio_page(f << 3880 folio_unlock(folio); << 3881 folio_put(folio); << 3882 return ERR_PTR(-ECHIL << 3883 } << 3884 folio_unlock(folio); << 3885 } << 3886 set_delayed_call(done, shmem_put_link << 3887 return folio_address(folio); << 3888 } 1394 } 3889 1395 3890 #ifdef CONFIG_TMPFS_XATTR !! 1396 static int shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd) 3891 << 3892 static int shmem_fileattr_get(struct dentry * << 3893 { 1397 { 3894 struct shmem_inode_info *info = SHMEM !! 1398 return vfs_follow_link(nd, (const char *)SHMEM_I(dentry->d_inode)); 3895 << 3896 fileattr_fill_flags(fa, info->fsflags << 3897 << 3898 return 0; << 3899 } 1399 } 3900 1400 3901 static int shmem_fileattr_set(struct mnt_idma !! 1401 static int shmem_readlink(struct dentry *dentry, char *buffer, int buflen) 3902 struct dentry * << 3903 { 1402 { 3904 struct inode *inode = d_inode(dentry) !! 1403 struct page *page = NULL; 3905 struct shmem_inode_info *info = SHMEM !! 1404 int res = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ); 3906 !! 1405 if (res) 3907 if (fileattr_has_fsx(fa)) !! 1406 return res; 3908 return -EOPNOTSUPP; !! 1407 res = vfs_readlink(dentry, buffer, buflen, kmap(page)); 3909 if (fa->flags & ~SHMEM_FL_USER_MODIFI !! 1408 kunmap(page); 3910 return -EOPNOTSUPP; !! 1409 mark_page_accessed(page); 3911 !! 1410 page_cache_release(page); 3912 info->fsflags = (info->fsflags & ~SHM !! 1411 return res; 3913 (fa->flags & SHMEM_FL_USER_MO << 3914 << 3915 shmem_set_inode_flags(inode, info->fs << 3916 inode_set_ctime_current(inode); << 3917 inode_inc_iversion(inode); << 3918 return 0; << 3919 } << 3920 << 3921 /* << 3922 * Superblocks without xattr inode operations << 3923 * support from the LSM "for free". As soon a << 3924 * like ACLs, we also need to implement the s << 3925 * filesystem level, though. << 3926 */ << 3927 << 3928 /* << 3929 * Callback for security_inode_init_security( << 3930 */ << 3931 static int shmem_initxattrs(struct inode *ino << 3932 const struct xatt << 3933 { << 3934 struct shmem_inode_info *info = SHMEM << 3935 struct shmem_sb_info *sbinfo = SHMEM_ << 3936 const struct xattr *xattr; << 3937 struct simple_xattr *new_xattr; << 3938 size_t ispace = 0; << 3939 size_t len; << 3940 << 3941 if (sbinfo->max_inodes) { << 3942 for (xattr = xattr_array; xat << 3943 ispace += simple_xatt << 3944 xattr->value_ << 3945 } << 3946 if (ispace) { << 3947 raw_spin_lock(&sbinfo << 3948 if (sbinfo->free_ispa << 3949 ispace = 0; << 3950 else << 3951 sbinfo->free_ << 3952 raw_spin_unlock(&sbin << 3953 if (!ispace) << 3954 return -ENOSP << 3955 } << 3956 } << 3957 << 3958 for (xattr = xattr_array; xattr->name << 3959 new_xattr = simple_xattr_allo << 3960 if (!new_xattr) << 3961 break; << 3962 << 3963 len = strlen(xattr->name) + 1 << 3964 new_xattr->name = kmalloc(XAT << 3965 GFP << 3966 if (!new_xattr->name) { << 3967 kvfree(new_xattr); << 3968 break; << 3969 } << 3970 << 3971 memcpy(new_xattr->name, XATTR << 3972 XATTR_SECURITY_PREFIX_ << 3973 memcpy(new_xattr->name + XATT << 3974 xattr->name, len); << 3975 << 3976 simple_xattr_add(&info->xattr << 3977 } << 3978 << 3979 if (xattr->name != NULL) { << 3980 if (ispace) { << 3981 raw_spin_lock(&sbinfo << 3982 sbinfo->free_ispace + << 3983 raw_spin_unlock(&sbin << 3984 } << 3985 simple_xattrs_free(&info->xat << 3986 return -ENOMEM; << 3987 } << 3988 << 3989 return 0; << 3990 } << 3991 << 3992 static int shmem_xattr_handler_get(const stru << 3993 struct den << 3994 const char << 3995 { << 3996 struct shmem_inode_info *info = SHMEM << 3997 << 3998 name = xattr_full_name(handler, name) << 3999 return simple_xattr_get(&info->xattrs << 4000 } << 4001 << 4002 static int shmem_xattr_handler_set(const stru << 4003 struct mnt << 4004 struct den << 4005 const char << 4006 size_t siz << 4007 { << 4008 struct shmem_inode_info *info = SHMEM << 4009 struct shmem_sb_info *sbinfo = SHMEM_ << 4010 struct simple_xattr *old_xattr; << 4011 size_t ispace = 0; << 4012 << 4013 name = xattr_full_name(handler, name) << 4014 if (value && sbinfo->max_inodes) { << 4015 ispace = simple_xattr_space(n << 4016 raw_spin_lock(&sbinfo->stat_l << 4017 if (sbinfo->free_ispace < isp << 4018 ispace = 0; << 4019 else << 4020 sbinfo->free_ispace - << 4021 raw_spin_unlock(&sbinfo->stat << 4022 if (!ispace) << 4023 return -ENOSPC; << 4024 } << 4025 << 4026 old_xattr = simple_xattr_set(&info->x << 4027 if (!IS_ERR(old_xattr)) { << 4028 ispace = 0; << 4029 if (old_xattr && sbinfo->max_ << 4030 ispace = simple_xattr << 4031 << 4032 simple_xattr_free(old_xattr); << 4033 old_xattr = NULL; << 4034 inode_set_ctime_current(inode << 4035 inode_inc_iversion(inode); << 4036 } << 4037 if (ispace) { << 4038 raw_spin_lock(&sbinfo->stat_l << 4039 sbinfo->free_ispace += ispace << 4040 raw_spin_unlock(&sbinfo->stat << 4041 } << 4042 return PTR_ERR(old_xattr); << 4043 } << 4044 << 4045 static const struct xattr_handler shmem_secur << 4046 .prefix = XATTR_SECURITY_PREFIX, << 4047 .get = shmem_xattr_handler_get, << 4048 .set = shmem_xattr_handler_set, << 4049 }; << 4050 << 4051 static const struct xattr_handler shmem_trust << 4052 .prefix = XATTR_TRUSTED_PREFIX, << 4053 .get = shmem_xattr_handler_get, << 4054 .set = shmem_xattr_handler_set, << 4055 }; << 4056 << 4057 static const struct xattr_handler shmem_user_ << 4058 .prefix = XATTR_USER_PREFIX, << 4059 .get = shmem_xattr_handler_get, << 4060 .set = shmem_xattr_handler_set, << 4061 }; << 4062 << 4063 static const struct xattr_handler * const shm << 4064 &shmem_security_xattr_handler, << 4065 &shmem_trusted_xattr_handler, << 4066 &shmem_user_xattr_handler, << 4067 NULL << 4068 }; << 4069 << 4070 static ssize_t shmem_listxattr(struct dentry << 4071 { << 4072 struct shmem_inode_info *info = SHMEM << 4073 return simple_xattr_list(d_inode(dent << 4074 } << 4075 #endif /* CONFIG_TMPFS_XATTR */ << 4076 << 4077 static const struct inode_operations shmem_sh << 4078 .getattr = shmem_getattr, << 4079 .setattr = shmem_setattr, << 4080 .get_link = simple_get_link, << 4081 #ifdef CONFIG_TMPFS_XATTR << 4082 .listxattr = shmem_listxattr, << 4083 #endif << 4084 }; << 4085 << 4086 static const struct inode_operations shmem_sy << 4087 .getattr = shmem_getattr, << 4088 .setattr = shmem_setattr, << 4089 .get_link = shmem_get_link, << 4090 #ifdef CONFIG_TMPFS_XATTR << 4091 .listxattr = shmem_listxattr, << 4092 #endif << 4093 }; << 4094 << 4095 static struct dentry *shmem_get_parent(struct << 4096 { << 4097 return ERR_PTR(-ESTALE); << 4098 } << 4099 << 4100 static int shmem_match(struct inode *ino, voi << 4101 { << 4102 __u32 *fh = vfh; << 4103 __u64 inum = fh[2]; << 4104 inum = (inum << 32) | fh[1]; << 4105 return ino->i_ino == inum && fh[0] == << 4106 } << 4107 << 4108 /* Find any alias of inode, but prefer a hash << 4109 static struct dentry *shmem_find_alias(struct << 4110 { << 4111 struct dentry *alias = d_find_alias(i << 4112 << 4113 return alias ?: d_find_any_alias(inod << 4114 } << 4115 << 4116 static struct dentry *shmem_fh_to_dentry(stru << 4117 struct fid *fid, int fh_len, << 4118 { << 4119 struct inode *inode; << 4120 struct dentry *dentry = NULL; << 4121 u64 inum; << 4122 << 4123 if (fh_len < 3) << 4124 return NULL; << 4125 << 4126 inum = fid->raw[2]; << 4127 inum = (inum << 32) | fid->raw[1]; << 4128 << 4129 inode = ilookup5(sb, (unsigned long)( << 4130 shmem_match, fid->raw << 4131 if (inode) { << 4132 dentry = shmem_find_alias(ino << 4133 iput(inode); << 4134 } << 4135 << 4136 return dentry; << 4137 } 1412 } 4138 1413 4139 static int shmem_encode_fh(struct inode *inod !! 1414 static int shmem_follow_link(struct dentry *dentry, struct nameidata *nd) 4140 struct inode << 4141 { 1415 { 4142 if (*len < 3) { !! 1416 struct page *page = NULL; 4143 *len = 3; !! 1417 int res = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ); 4144 return FILEID_INVALID; !! 1418 if (res) 4145 } !! 1419 return res; 4146 !! 1420 res = vfs_follow_link(nd, kmap(page)); 4147 if (inode_unhashed(inode)) { !! 1421 kunmap(page); 4148 /* Unfortunately insert_inode !! 1422 mark_page_accessed(page); 4149 * so as we hash inodes here !! 1423 page_cache_release(page); 4150 * time, we need a lock to en !! 1424 return res; 4151 * to do it once << 4152 */ << 4153 static DEFINE_SPINLOCK(lock); << 4154 spin_lock(&lock); << 4155 if (inode_unhashed(inode)) << 4156 __insert_inode_hash(i << 4157 i << 4158 spin_unlock(&lock); << 4159 } << 4160 << 4161 fh[0] = inode->i_generation; << 4162 fh[1] = inode->i_ino; << 4163 fh[2] = ((__u64)inode->i_ino) >> 32; << 4164 << 4165 *len = 3; << 4166 return 1; << 4167 } 1425 } 4168 1426 4169 static const struct export_operations shmem_e !! 1427 static struct inode_operations shmem_symlink_inline_operations = { 4170 .get_parent = shmem_get_parent, !! 1428 readlink: shmem_readlink_inline, 4171 .encode_fh = shmem_encode_fh, !! 1429 follow_link: shmem_follow_link_inline, 4172 .fh_to_dentry = shmem_fh_to_dentry, << 4173 }; << 4174 << 4175 enum shmem_param { << 4176 Opt_gid, << 4177 Opt_huge, << 4178 Opt_mode, << 4179 Opt_mpol, << 4180 Opt_nr_blocks, << 4181 Opt_nr_inodes, << 4182 Opt_size, << 4183 Opt_uid, << 4184 Opt_inode32, << 4185 Opt_inode64, << 4186 Opt_noswap, << 4187 Opt_quota, << 4188 Opt_usrquota, << 4189 Opt_grpquota, << 4190 Opt_usrquota_block_hardlimit, << 4191 Opt_usrquota_inode_hardlimit, << 4192 Opt_grpquota_block_hardlimit, << 4193 Opt_grpquota_inode_hardlimit, << 4194 }; 1430 }; 4195 1431 4196 static const struct constant_table shmem_para !! 1432 static struct inode_operations shmem_symlink_inode_operations = { 4197 {"never", SHMEM_HUGE_NEVER }, !! 1433 truncate: shmem_truncate, 4198 {"always", SHMEM_HUGE_ALWAYS }, !! 1434 readlink: shmem_readlink, 4199 {"within_size", SHMEM_HUGE_WITHIN_SIZ !! 1435 follow_link: shmem_follow_link, 4200 {"advise", SHMEM_HUGE_ADVISE }, << 4201 {} << 4202 }; 1436 }; 4203 1437 4204 const struct fs_parameter_spec shmem_fs_param !! 1438 static int shmem_parse_options(char *options, int *mode, uid_t *uid, gid_t *gid, unsigned long *blocks, unsigned long *inodes) 4205 fsparam_gid ("gid", Opt_g << 4206 fsparam_enum ("huge", Opt_h << 4207 fsparam_u32oct("mode", Opt_m << 4208 fsparam_string("mpol", Opt_m << 4209 fsparam_string("nr_blocks", Opt_n << 4210 fsparam_string("nr_inodes", Opt_n << 4211 fsparam_string("size", Opt_s << 4212 fsparam_uid ("uid", Opt_u << 4213 fsparam_flag ("inode32", Opt_i << 4214 fsparam_flag ("inode64", Opt_i << 4215 fsparam_flag ("noswap", Opt_n << 4216 #ifdef CONFIG_TMPFS_QUOTA << 4217 fsparam_flag ("quota", Opt_q << 4218 fsparam_flag ("usrquota", Opt_u << 4219 fsparam_flag ("grpquota", Opt_g << 4220 fsparam_string("usrquota_block_hardli << 4221 fsparam_string("usrquota_inode_hardli << 4222 fsparam_string("grpquota_block_hardli << 4223 fsparam_string("grpquota_inode_hardli << 4224 #endif << 4225 {} << 4226 }; << 4227 << 4228 static int shmem_parse_one(struct fs_context << 4229 { 1439 { 4230 struct shmem_options *ctx = fc->fs_pr !! 1440 char *this_char, *value, *rest; 4231 struct fs_parse_result result; << 4232 unsigned long long size; << 4233 char *rest; << 4234 int opt; << 4235 kuid_t kuid; << 4236 kgid_t kgid; << 4237 << 4238 opt = fs_parse(fc, shmem_fs_parameter << 4239 if (opt < 0) << 4240 return opt; << 4241 << 4242 switch (opt) { << 4243 case Opt_size: << 4244 size = memparse(param->string << 4245 if (*rest == '%') { << 4246 size <<= PAGE_SHIFT; << 4247 size *= totalram_page << 4248 do_div(size, 100); << 4249 rest++; << 4250 } << 4251 if (*rest) << 4252 goto bad_value; << 4253 ctx->blocks = DIV_ROUND_UP(si << 4254 ctx->seen |= SHMEM_SEEN_BLOCK << 4255 break; << 4256 case Opt_nr_blocks: << 4257 ctx->blocks = memparse(param- << 4258 if (*rest || ctx->blocks > LO << 4259 goto bad_value; << 4260 ctx->seen |= SHMEM_SEEN_BLOCK << 4261 break; << 4262 case Opt_nr_inodes: << 4263 ctx->inodes = memparse(param- << 4264 if (*rest || ctx->inodes > UL << 4265 goto bad_value; << 4266 ctx->seen |= SHMEM_SEEN_INODE << 4267 break; << 4268 case Opt_mode: << 4269 ctx->mode = result.uint_32 & << 4270 break; << 4271 case Opt_uid: << 4272 kuid = result.uid; << 4273 << 4274 /* << 4275 * The requested uid must be << 4276 * filesystem's idmapping. << 4277 */ << 4278 if (!kuid_has_mapping(fc->use << 4279 goto bad_value; << 4280 << 4281 ctx->uid = kuid; << 4282 break; << 4283 case Opt_gid: << 4284 kgid = result.gid; << 4285 << 4286 /* << 4287 * The requested gid must be << 4288 * filesystem's idmapping. << 4289 */ << 4290 if (!kgid_has_mapping(fc->use << 4291 goto bad_value; << 4292 << 4293 ctx->gid = kgid; << 4294 break; << 4295 case Opt_huge: << 4296 ctx->huge = result.uint_32; << 4297 if (ctx->huge != SHMEM_HUGE_N << 4298 !(IS_ENABLED(CONFIG_TRANS << 4299 has_transparent_hugepag << 4300 goto unsupported_para << 4301 ctx->seen |= SHMEM_SEEN_HUGE; << 4302 break; << 4303 case Opt_mpol: << 4304 if (IS_ENABLED(CONFIG_NUMA)) << 4305 mpol_put(ctx->mpol); << 4306 ctx->mpol = NULL; << 4307 if (mpol_parse_str(pa << 4308 goto bad_valu << 4309 break; << 4310 } << 4311 goto unsupported_parameter; << 4312 case Opt_inode32: << 4313 ctx->full_inums = false; << 4314 ctx->seen |= SHMEM_SEEN_INUMS << 4315 break; << 4316 case Opt_inode64: << 4317 if (sizeof(ino_t) < 8) { << 4318 return invalfc(fc, << 4319 "Canno << 4320 } << 4321 ctx->full_inums = true; << 4322 ctx->seen |= SHMEM_SEEN_INUMS << 4323 break; << 4324 case Opt_noswap: << 4325 if ((fc->user_ns != &init_use << 4326 return invalfc(fc, << 4327 "Turni << 4328 } << 4329 ctx->noswap = true; << 4330 ctx->seen |= SHMEM_SEEN_NOSWA << 4331 break; << 4332 case Opt_quota: << 4333 if (fc->user_ns != &init_user << 4334 return invalfc(fc, "Q << 4335 ctx->seen |= SHMEM_SEEN_QUOTA << 4336 ctx->quota_types |= (QTYPE_MA << 4337 break; << 4338 case Opt_usrquota: << 4339 if (fc->user_ns != &init_user << 4340 return invalfc(fc, "Q << 4341 ctx->seen |= SHMEM_SEEN_QUOTA << 4342 ctx->quota_types |= QTYPE_MAS << 4343 break; << 4344 case Opt_grpquota: << 4345 if (fc->user_ns != &init_user << 4346 return invalfc(fc, "Q << 4347 ctx->seen |= SHMEM_SEEN_QUOTA << 4348 ctx->quota_types |= QTYPE_MAS << 4349 break; << 4350 case Opt_usrquota_block_hardlimit: << 4351 size = memparse(param->string << 4352 if (*rest || !size) << 4353 goto bad_value; << 4354 if (size > SHMEM_QUOTA_MAX_SP << 4355 return invalfc(fc, << 4356 "User << 4357 ctx->qlimits.usrquota_bhardli << 4358 break; << 4359 case Opt_grpquota_block_hardlimit: << 4360 size = memparse(param->string << 4361 if (*rest || !size) << 4362 goto bad_value; << 4363 if (size > SHMEM_QUOTA_MAX_SP << 4364 return invalfc(fc, << 4365 "Group << 4366 ctx->qlimits.grpquota_bhardli << 4367 break; << 4368 case Opt_usrquota_inode_hardlimit: << 4369 size = memparse(param->string << 4370 if (*rest || !size) << 4371 goto bad_value; << 4372 if (size > SHMEM_QUOTA_MAX_IN << 4373 return invalfc(fc, << 4374 "User << 4375 ctx->qlimits.usrquota_ihardli << 4376 break; << 4377 case Opt_grpquota_inode_hardlimit: << 4378 size = memparse(param->string << 4379 if (*rest || !size) << 4380 goto bad_value; << 4381 if (size > SHMEM_QUOTA_MAX_IN << 4382 return invalfc(fc, << 4383 "Group << 4384 ctx->qlimits.grpquota_ihardli << 4385 break; << 4386 } << 4387 return 0; << 4388 << 4389 unsupported_parameter: << 4390 return invalfc(fc, "Unsupported param << 4391 bad_value: << 4392 return invalfc(fc, "Bad value for '%s << 4393 } << 4394 << 4395 static int shmem_parse_options(struct fs_cont << 4396 { << 4397 char *options = data; << 4398 << 4399 if (options) { << 4400 int err = security_sb_eat_lsm << 4401 if (err) << 4402 return err; << 4403 } << 4404 1441 4405 while (options != NULL) { !! 1442 while ((this_char = strsep(&options, ",")) != NULL) { 4406 char *this_char = options; !! 1443 if (!*this_char) 4407 for (;;) { !! 1444 continue; 4408 /* !! 1445 if ((value = strchr(this_char,'=')) != NULL) { 4409 * NUL-terminate this !! 1446 *value++ = 0; 4410 * mount options form !! 1447 } else { 4411 * but mpol's nodelis !! 1448 printk(KERN_ERR 4412 */ !! 1449 "tmpfs: No value for mount option '%s'\n", 4413 options = strchr(opti !! 1450 this_char); 4414 if (options == NULL) !! 1451 return 1; 4415 break; !! 1452 } 4416 options++; !! 1453 4417 if (!isdigit(*options !! 1454 if (!strcmp(this_char,"size")) { 4418 options[-1] = !! 1455 unsigned long long size; 4419 break; !! 1456 size = memparse(value,&rest); 4420 } !! 1457 if (*rest == '%') { 4421 } !! 1458 struct sysinfo si; 4422 if (*this_char) { !! 1459 si_meminfo(&si); 4423 char *value = strchr( !! 1460 size <<= PAGE_SHIFT; 4424 size_t len = 0; !! 1461 size *= si.totalram; 4425 int err; !! 1462 do_div(size, 100); 4426 !! 1463 rest++; 4427 if (value) { << 4428 *value++ = '\ << 4429 len = strlen( << 4430 } 1464 } 4431 err = vfs_parse_fs_st !! 1465 if (*rest) 4432 if (err < 0) !! 1466 goto bad_val; 4433 return err; !! 1467 *blocks = size >> PAGE_CACHE_SHIFT; >> 1468 } else if (!strcmp(this_char,"nr_blocks")) { >> 1469 *blocks = memparse(value,&rest); >> 1470 if (*rest) >> 1471 goto bad_val; >> 1472 } else if (!strcmp(this_char,"nr_inodes")) { >> 1473 *inodes = memparse(value,&rest); >> 1474 if (*rest) >> 1475 goto bad_val; >> 1476 } else if (!strcmp(this_char,"mode")) { >> 1477 if (!mode) >> 1478 continue; >> 1479 *mode = simple_strtoul(value,&rest,8); >> 1480 if (*rest) >> 1481 goto bad_val; >> 1482 } else if (!strcmp(this_char,"uid")) { >> 1483 if (!uid) >> 1484 continue; >> 1485 *uid = simple_strtoul(value,&rest,0); >> 1486 if (*rest) >> 1487 goto bad_val; >> 1488 } else if (!strcmp(this_char,"gid")) { >> 1489 if (!gid) >> 1490 continue; >> 1491 *gid = simple_strtoul(value,&rest,0); >> 1492 if (*rest) >> 1493 goto bad_val; >> 1494 } else { >> 1495 printk(KERN_ERR "tmpfs: Bad mount option %s\n", >> 1496 this_char); >> 1497 return 1; 4434 } 1498 } 4435 } 1499 } 4436 return 0; 1500 return 0; >> 1501 >> 1502 bad_val: >> 1503 printk(KERN_ERR "tmpfs: Bad value '%s' for mount option '%s'\n", >> 1504 value, this_char); >> 1505 return 1; 4437 } 1506 } 4438 1507 4439 /* !! 1508 static int shmem_remount_fs(struct super_block *sb, int *flags, char *data) 4440 * Reconfigure a shmem filesystem. << 4441 */ << 4442 static int shmem_reconfigure(struct fs_contex << 4443 { 1509 { 4444 struct shmem_options *ctx = fc->fs_pr !! 1510 struct shmem_sb_info *sbinfo = SHMEM_SB(sb); 4445 struct shmem_sb_info *sbinfo = SHMEM_ !! 1511 unsigned long max_blocks = sbinfo->max_blocks; 4446 unsigned long used_isp; !! 1512 unsigned long max_inodes = sbinfo->max_inodes; 4447 struct mempolicy *mpol = NULL; << 4448 const char *err; << 4449 << 4450 raw_spin_lock(&sbinfo->stat_lock); << 4451 used_isp = sbinfo->max_inodes * BOGO_ << 4452 << 4453 if ((ctx->seen & SHMEM_SEEN_BLOCKS) & << 4454 if (!sbinfo->max_blocks) { << 4455 err = "Cannot retroac << 4456 goto out; << 4457 } << 4458 if (percpu_counter_compare(&s << 4459 ct << 4460 err = "Too small a si << 4461 goto out; << 4462 } << 4463 } << 4464 if ((ctx->seen & SHMEM_SEEN_INODES) & << 4465 if (!sbinfo->max_inodes) { << 4466 err = "Cannot retroac << 4467 goto out; << 4468 } << 4469 if (ctx->inodes * BOGO_INODE_ << 4470 err = "Too few inodes << 4471 goto out; << 4472 } << 4473 } << 4474 << 4475 if ((ctx->seen & SHMEM_SEEN_INUMS) && << 4476 sbinfo->next_ino > UINT_MAX) { << 4477 err = "Current inum too high << 4478 goto out; << 4479 } << 4480 if ((ctx->seen & SHMEM_SEEN_NOSWAP) & << 4481 err = "Cannot disable swap on << 4482 goto out; << 4483 } << 4484 if (!(ctx->seen & SHMEM_SEEN_NOSWAP) << 4485 err = "Cannot enable swap on << 4486 goto out; << 4487 } << 4488 << 4489 if (ctx->seen & SHMEM_SEEN_QUOTA && << 4490 !sb_any_quota_loaded(fc->root->d_ << 4491 err = "Cannot enable quota on << 4492 goto out; << 4493 } << 4494 << 4495 #ifdef CONFIG_TMPFS_QUOTA << 4496 #define CHANGED_LIMIT(name) << 4497 (ctx->qlimits.name## hardlimit && << 4498 (ctx->qlimits.name## hardlimit != sbi << 4499 << 4500 if (CHANGED_LIMIT(usrquota_b) || CHAN << 4501 CHANGED_LIMIT(grpquota_b) || CHAN << 4502 err = "Cannot change global q << 4503 goto out; << 4504 } << 4505 #endif /* CONFIG_TMPFS_QUOTA */ << 4506 << 4507 if (ctx->seen & SHMEM_SEEN_HUGE) << 4508 sbinfo->huge = ctx->huge; << 4509 if (ctx->seen & SHMEM_SEEN_INUMS) << 4510 sbinfo->full_inums = ctx->ful << 4511 if (ctx->seen & SHMEM_SEEN_BLOCKS) << 4512 sbinfo->max_blocks = ctx->bl << 4513 if (ctx->seen & SHMEM_SEEN_INODES) { << 4514 sbinfo->max_inodes = ctx->in << 4515 sbinfo->free_ispace = ctx->in << 4516 } << 4517 << 4518 /* << 4519 * Preserve previous mempolicy unless << 4520 */ << 4521 if (ctx->mpol) { << 4522 mpol = sbinfo->mpol; << 4523 sbinfo->mpol = ctx->mpol; << 4524 ctx->mpol = NULL; << 4525 } << 4526 << 4527 if (ctx->noswap) << 4528 sbinfo->noswap = true; << 4529 1513 4530 raw_spin_unlock(&sbinfo->stat_lock); !! 1514 if (shmem_parse_options(data, NULL, NULL, NULL, &max_blocks, &max_inodes)) 4531 mpol_put(mpol); !! 1515 return -EINVAL; 4532 return 0; !! 1516 return shmem_set_size(sbinfo, max_blocks, max_inodes); 4533 out: << 4534 raw_spin_unlock(&sbinfo->stat_lock); << 4535 return invalfc(fc, "%s", err); << 4536 } 1517 } 4537 1518 4538 static int shmem_show_options(struct seq_file !! 1519 static int shmem_sync_file(struct file *file, struct dentry *dentry, int datasync) 4539 { 1520 { 4540 struct shmem_sb_info *sbinfo = SHMEM_ << 4541 struct mempolicy *mpol; << 4542 << 4543 if (sbinfo->max_blocks != shmem_defau << 4544 seq_printf(seq, ",size=%luk", << 4545 if (sbinfo->max_inodes != shmem_defau << 4546 seq_printf(seq, ",nr_inodes=% << 4547 if (sbinfo->mode != (0777 | S_ISVTX)) << 4548 seq_printf(seq, ",mode=%03ho" << 4549 if (!uid_eq(sbinfo->uid, GLOBAL_ROOT_ << 4550 seq_printf(seq, ",uid=%u", << 4551 from_kuid_mun << 4552 if (!gid_eq(sbinfo->gid, GLOBAL_ROOT_ << 4553 seq_printf(seq, ",gid=%u", << 4554 from_kgid_mun << 4555 << 4556 /* << 4557 * Showing inode{64,32} might be usef << 4558 * since then people don't have to re << 4559 * /proc/config.gz to confirm 64-bit << 4560 * (which may not even exist if IKCON << 4561 * << 4562 * We hide it when inode64 isn't the << 4563 * inodes, since that probably just m << 4564 * consideration. << 4565 * << 4566 * As such: << 4567 * << 4568 * +------------- << 4569 * | TMPFS_INODE6 << 4570 * +------------------+------------- << 4571 * | full_inums=true | show << 4572 * | full_inums=false | show << 4573 * +------------------+------------- << 4574 * << 4575 */ << 4576 if (IS_ENABLED(CONFIG_TMPFS_INODE64) << 4577 seq_printf(seq, ",inode%d", ( << 4578 #ifdef CONFIG_TRANSPARENT_HUGEPAGE << 4579 /* Rightly or wrongly, show huge moun << 4580 if (sbinfo->huge) << 4581 seq_printf(seq, ",huge=%s", s << 4582 #endif << 4583 mpol = shmem_get_sbmpol(sbinfo); << 4584 shmem_show_mpol(seq, mpol); << 4585 mpol_put(mpol); << 4586 if (sbinfo->noswap) << 4587 seq_printf(seq, ",noswap"); << 4588 #ifdef CONFIG_TMPFS_QUOTA << 4589 if (sb_has_quota_active(root->d_sb, U << 4590 seq_printf(seq, ",usrquota"); << 4591 if (sb_has_quota_active(root->d_sb, G << 4592 seq_printf(seq, ",grpquota"); << 4593 if (sbinfo->qlimits.usrquota_bhardlim << 4594 seq_printf(seq, ",usrquota_bl << 4595 sbinfo->qlimits.us << 4596 if (sbinfo->qlimits.grpquota_bhardlim << 4597 seq_printf(seq, ",grpquota_bl << 4598 sbinfo->qlimits.gr << 4599 if (sbinfo->qlimits.usrquota_ihardlim << 4600 seq_printf(seq, ",usrquota_in << 4601 sbinfo->qlimits.us << 4602 if (sbinfo->qlimits.grpquota_ihardlim << 4603 seq_printf(seq, ",grpquota_in << 4604 sbinfo->qlimits.gr << 4605 #endif << 4606 return 0; 1521 return 0; 4607 } 1522 } 4608 << 4609 #endif /* CONFIG_TMPFS */ << 4610 << 4611 static void shmem_put_super(struct super_bloc << 4612 { << 4613 struct shmem_sb_info *sbinfo = SHMEM_ << 4614 << 4615 #ifdef CONFIG_TMPFS_QUOTA << 4616 shmem_disable_quotas(sb); << 4617 #endif 1523 #endif 4618 free_percpu(sbinfo->ino_batch); << 4619 percpu_counter_destroy(&sbinfo->used_ << 4620 mpol_put(sbinfo->mpol); << 4621 kfree(sbinfo); << 4622 sb->s_fs_info = NULL; << 4623 } << 4624 1524 4625 static int shmem_fill_super(struct super_bloc !! 1525 static struct super_block *shmem_read_super(struct super_block *sb, void *data, int silent) 4626 { 1526 { 4627 struct shmem_options *ctx = fc->fs_pr << 4628 struct inode *inode; 1527 struct inode *inode; 4629 struct shmem_sb_info *sbinfo; !! 1528 struct dentry *root; 4630 int error = -ENOMEM; !! 1529 unsigned long blocks, inodes; 4631 !! 1530 int mode = S_IRWXUGO | S_ISVTX; 4632 /* Round up to L1_CACHE_BYTES to resi !! 1531 uid_t uid = current->fsuid; 4633 sbinfo = kzalloc(max((int)sizeof(stru !! 1532 gid_t gid = current->fsgid; 4634 L1_CACHE_BYTE !! 1533 struct shmem_sb_info *sbinfo = SHMEM_SB(sb); 4635 if (!sbinfo) !! 1534 struct sysinfo si; 4636 return error; << 4637 << 4638 sb->s_fs_info = sbinfo; << 4639 1535 4640 #ifdef CONFIG_TMPFS << 4641 /* 1536 /* 4642 * Per default we only allow half of 1537 * Per default we only allow half of the physical ram per 4643 * tmpfs instance, limiting inodes to !! 1538 * tmpfs instance 4644 * but the internal instance is left << 4645 */ 1539 */ 4646 if (!(sb->s_flags & SB_KERNMOUNT)) { !! 1540 si_meminfo(&si); 4647 if (!(ctx->seen & SHMEM_SEEN_ !! 1541 blocks = inodes = si.totalram / 2; 4648 ctx->blocks = shmem_d << 4649 if (!(ctx->seen & SHMEM_SEEN_ << 4650 ctx->inodes = shmem_d << 4651 if (!(ctx->seen & SHMEM_SEEN_ << 4652 ctx->full_inums = IS_ << 4653 sbinfo->noswap = ctx->noswap; << 4654 } else { << 4655 sb->s_flags |= SB_NOUSER; << 4656 } << 4657 sb->s_export_op = &shmem_export_ops; << 4658 sb->s_flags |= SB_NOSEC | SB_I_VERSIO << 4659 #else << 4660 sb->s_flags |= SB_NOUSER; << 4661 #endif << 4662 sbinfo->max_blocks = ctx->blocks; << 4663 sbinfo->max_inodes = ctx->inodes; << 4664 sbinfo->free_ispace = sbinfo->max_ino << 4665 if (sb->s_flags & SB_KERNMOUNT) { << 4666 sbinfo->ino_batch = alloc_per << 4667 if (!sbinfo->ino_batch) << 4668 goto failed; << 4669 } << 4670 sbinfo->uid = ctx->uid; << 4671 sbinfo->gid = ctx->gid; << 4672 sbinfo->full_inums = ctx->full_inums; << 4673 sbinfo->mode = ctx->mode; << 4674 sbinfo->huge = ctx->huge; << 4675 sbinfo->mpol = ctx->mpol; << 4676 ctx->mpol = NULL; << 4677 << 4678 raw_spin_lock_init(&sbinfo->stat_lock << 4679 if (percpu_counter_init(&sbinfo->used << 4680 goto failed; << 4681 spin_lock_init(&sbinfo->shrinklist_lo << 4682 INIT_LIST_HEAD(&sbinfo->shrinklist); << 4683 << 4684 sb->s_maxbytes = MAX_LFS_FILESIZE; << 4685 sb->s_blocksize = PAGE_SIZE; << 4686 sb->s_blocksize_bits = PAGE_SHIFT; << 4687 sb->s_magic = TMPFS_MAGIC; << 4688 sb->s_op = &shmem_ops; << 4689 sb->s_time_gran = 1; << 4690 #ifdef CONFIG_TMPFS_XATTR << 4691 sb->s_xattr = shmem_xattr_handlers; << 4692 #endif << 4693 #ifdef CONFIG_TMPFS_POSIX_ACL << 4694 sb->s_flags |= SB_POSIXACL; << 4695 #endif << 4696 uuid_t uuid; << 4697 uuid_gen(&uuid); << 4698 super_set_uuid(sb, uuid.b, sizeof(uui << 4699 << 4700 #ifdef CONFIG_TMPFS_QUOTA << 4701 if (ctx->seen & SHMEM_SEEN_QUOTA) { << 4702 sb->dq_op = &shmem_quota_oper << 4703 sb->s_qcop = &dquot_quotactl_ << 4704 sb->s_quota_types = QTYPE_MAS << 4705 << 4706 /* Copy the default limits fr << 4707 memcpy(&sbinfo->qlimits, &ctx << 4708 sizeof(struct shmem_qu << 4709 << 4710 if (shmem_enable_quotas(sb, c << 4711 goto failed; << 4712 } << 4713 #endif /* CONFIG_TMPFS_QUOTA */ << 4714 << 4715 inode = shmem_get_inode(&nop_mnt_idma << 4716 S_IFDIR | sbi << 4717 if (IS_ERR(inode)) { << 4718 error = PTR_ERR(inode); << 4719 goto failed; << 4720 } << 4721 inode->i_uid = sbinfo->uid; << 4722 inode->i_gid = sbinfo->gid; << 4723 sb->s_root = d_make_root(inode); << 4724 if (!sb->s_root) << 4725 goto failed; << 4726 return 0; << 4727 << 4728 failed: << 4729 shmem_put_super(sb); << 4730 return error; << 4731 } << 4732 << 4733 static int shmem_get_tree(struct fs_context * << 4734 { << 4735 return get_tree_nodev(fc, shmem_fill_ << 4736 } << 4737 << 4738 static void shmem_free_fc(struct fs_context * << 4739 { << 4740 struct shmem_options *ctx = fc->fs_pr << 4741 1542 4742 if (ctx) { << 4743 mpol_put(ctx->mpol); << 4744 kfree(ctx); << 4745 } << 4746 } << 4747 << 4748 static const struct fs_context_operations shm << 4749 .free = shmem_free_ << 4750 .get_tree = shmem_get_t << 4751 #ifdef CONFIG_TMPFS 1543 #ifdef CONFIG_TMPFS 4752 .parse_monolithic = shmem_parse !! 1544 if (shmem_parse_options(data, &mode, &uid, &gid, &blocks, &inodes)) 4753 .parse_param = shmem_parse !! 1545 return NULL; 4754 .reconfigure = shmem_recon << 4755 #endif 1546 #endif 4756 }; << 4757 << 4758 static struct kmem_cache *shmem_inode_cachep << 4759 1547 4760 static struct inode *shmem_alloc_inode(struct !! 1548 spin_lock_init(&sbinfo->stat_lock); 4761 { !! 1549 sbinfo->max_blocks = blocks; 4762 struct shmem_inode_info *info; !! 1550 sbinfo->free_blocks = blocks; 4763 info = alloc_inode_sb(sb, shmem_inode !! 1551 sbinfo->max_inodes = inodes; 4764 if (!info) !! 1552 sbinfo->free_inodes = inodes; >> 1553 sb->s_maxbytes = SHMEM_MAX_BYTES; >> 1554 sb->s_blocksize = PAGE_CACHE_SIZE; >> 1555 sb->s_blocksize_bits = PAGE_CACHE_SHIFT; >> 1556 sb->s_magic = TMPFS_MAGIC; >> 1557 sb->s_op = &shmem_ops; >> 1558 inode = shmem_get_inode(sb, S_IFDIR | mode, 0); >> 1559 if (!inode) 4765 return NULL; 1560 return NULL; 4766 return &info->vfs_inode; << 4767 } << 4768 << 4769 static void shmem_free_in_core_inode(struct i << 4770 { << 4771 if (S_ISLNK(inode->i_mode)) << 4772 kfree(inode->i_link); << 4773 kmem_cache_free(shmem_inode_cachep, S << 4774 } << 4775 << 4776 static void shmem_destroy_inode(struct inode << 4777 { << 4778 if (S_ISREG(inode->i_mode)) << 4779 mpol_free_shared_policy(&SHME << 4780 if (S_ISDIR(inode->i_mode)) << 4781 simple_offset_destroy(shmem_g << 4782 } << 4783 << 4784 static void shmem_init_inode(void *foo) << 4785 { << 4786 struct shmem_inode_info *info = foo; << 4787 inode_init_once(&info->vfs_inode); << 4788 } << 4789 << 4790 static void __init shmem_init_inodecache(void << 4791 { << 4792 shmem_inode_cachep = kmem_cache_creat << 4793 sizeof(struct << 4794 0, SLAB_PANIC << 4795 } << 4796 1561 4797 static void __init shmem_destroy_inodecache(v !! 1562 inode->i_uid = uid; 4798 { !! 1563 inode->i_gid = gid; 4799 kmem_cache_destroy(shmem_inode_cachep !! 1564 root = d_alloc_root(inode); 4800 } !! 1565 if (!root) { 4801 !! 1566 iput(inode); 4802 /* Keep the page in page cache instead of tru !! 1567 return NULL; 4803 static int shmem_error_remove_folio(struct ad !! 1568 } 4804 struct fol !! 1569 sb->s_root = root; 4805 { !! 1570 return sb; 4806 return 0; << 4807 } 1571 } 4808 1572 4809 static const struct address_space_operations !! 1573 static struct address_space_operations shmem_aops = { 4810 .writepage = shmem_writepage, !! 1574 removepage: shmem_removepage, 4811 .dirty_folio = noop_dirty_folio, !! 1575 writepage: shmem_writepage, 4812 #ifdef CONFIG_TMPFS 1576 #ifdef CONFIG_TMPFS 4813 .write_begin = shmem_write_begin, !! 1577 readpage: shmem_readpage, 4814 .write_end = shmem_write_end, !! 1578 prepare_write: shmem_prepare_write, 4815 #endif !! 1579 commit_write: shmem_commit_write, 4816 #ifdef CONFIG_MIGRATION << 4817 .migrate_folio = migrate_folio, << 4818 #endif 1580 #endif 4819 .error_remove_folio = shmem_error_rem << 4820 }; 1581 }; 4821 1582 4822 static const struct file_operations shmem_fil !! 1583 static struct file_operations shmem_file_operations = { 4823 .mmap = shmem_mmap, !! 1584 mmap: shmem_mmap, 4824 .open = shmem_file_open, << 4825 .get_unmapped_area = shmem_get_unmapp << 4826 #ifdef CONFIG_TMPFS 1585 #ifdef CONFIG_TMPFS 4827 .llseek = shmem_file_llseek, !! 1586 read: shmem_file_read, 4828 .read_iter = shmem_file_read_ite !! 1587 write: shmem_file_write, 4829 .write_iter = shmem_file_write_it !! 1588 fsync: shmem_sync_file, 4830 .fsync = noop_fsync, << 4831 .splice_read = shmem_file_splice_r << 4832 .splice_write = iter_file_splice_wr << 4833 .fallocate = shmem_fallocate, << 4834 #endif 1589 #endif 4835 }; 1590 }; 4836 1591 4837 static const struct inode_operations shmem_in !! 1592 static struct inode_operations shmem_inode_operations = { 4838 .getattr = shmem_getattr, !! 1593 truncate: shmem_truncate, 4839 .setattr = shmem_setattr, !! 1594 setattr: shmem_notify_change, 4840 #ifdef CONFIG_TMPFS_XATTR << 4841 .listxattr = shmem_listxattr, << 4842 .set_acl = simple_set_acl, << 4843 .fileattr_get = shmem_fileattr_get, << 4844 .fileattr_set = shmem_fileattr_set, << 4845 #endif << 4846 }; 1595 }; 4847 1596 4848 static const struct inode_operations shmem_di !! 1597 static struct inode_operations shmem_dir_inode_operations = { 4849 #ifdef CONFIG_TMPFS 1598 #ifdef CONFIG_TMPFS 4850 .getattr = shmem_getattr, !! 1599 create: shmem_create, 4851 .create = shmem_create, !! 1600 lookup: shmem_lookup, 4852 .lookup = simple_lookup, !! 1601 link: shmem_link, 4853 .link = shmem_link, !! 1602 unlink: shmem_unlink, 4854 .unlink = shmem_unlink, !! 1603 symlink: shmem_symlink, 4855 .symlink = shmem_symlink, !! 1604 mkdir: shmem_mkdir, 4856 .mkdir = shmem_mkdir, !! 1605 rmdir: shmem_rmdir, 4857 .rmdir = shmem_rmdir, !! 1606 mknod: shmem_mknod, 4858 .mknod = shmem_mknod, !! 1607 rename: shmem_rename, 4859 .rename = shmem_rename2, << 4860 .tmpfile = shmem_tmpfile, << 4861 .get_offset_ctx = shmem_get_offset_ct << 4862 #endif << 4863 #ifdef CONFIG_TMPFS_XATTR << 4864 .listxattr = shmem_listxattr, << 4865 .fileattr_get = shmem_fileattr_get, << 4866 .fileattr_set = shmem_fileattr_set, << 4867 #endif << 4868 #ifdef CONFIG_TMPFS_POSIX_ACL << 4869 .setattr = shmem_setattr, << 4870 .set_acl = simple_set_acl, << 4871 #endif 1608 #endif 4872 }; 1609 }; 4873 1610 4874 static const struct inode_operations shmem_sp !! 1611 static struct super_operations shmem_ops = { 4875 .getattr = shmem_getattr, << 4876 #ifdef CONFIG_TMPFS_XATTR << 4877 .listxattr = shmem_listxattr, << 4878 #endif << 4879 #ifdef CONFIG_TMPFS_POSIX_ACL << 4880 .setattr = shmem_setattr, << 4881 .set_acl = simple_set_acl, << 4882 #endif << 4883 }; << 4884 << 4885 static const struct super_operations shmem_op << 4886 .alloc_inode = shmem_alloc_inode, << 4887 .free_inode = shmem_free_in_core_ << 4888 .destroy_inode = shmem_destroy_inode << 4889 #ifdef CONFIG_TMPFS 1612 #ifdef CONFIG_TMPFS 4890 .statfs = shmem_statfs, !! 1613 statfs: shmem_statfs, 4891 .show_options = shmem_show_options, !! 1614 remount_fs: shmem_remount_fs, 4892 #endif << 4893 #ifdef CONFIG_TMPFS_QUOTA << 4894 .get_dquots = shmem_get_dquots, << 4895 #endif << 4896 .evict_inode = shmem_evict_inode, << 4897 .drop_inode = generic_delete_inod << 4898 .put_super = shmem_put_super, << 4899 #ifdef CONFIG_TRANSPARENT_HUGEPAGE << 4900 .nr_cached_objects = shmem_unuse << 4901 .free_cached_objects = shmem_unuse << 4902 #endif 1615 #endif >> 1616 delete_inode: shmem_delete_inode, >> 1617 put_inode: force_delete, 4903 }; 1618 }; 4904 1619 4905 static const struct vm_operations_struct shme !! 1620 static struct vm_operations_struct shmem_vm_ops = { 4906 .fault = shmem_fault, !! 1621 nopage: shmem_nopage, 4907 .map_pages = filemap_map_pages, << 4908 #ifdef CONFIG_NUMA << 4909 .set_policy = shmem_set_policy, << 4910 .get_policy = shmem_get_policy, << 4911 #endif << 4912 }; 1622 }; 4913 1623 4914 static const struct vm_operations_struct shme << 4915 .fault = shmem_fault, << 4916 .map_pages = filemap_map_pages, << 4917 #ifdef CONFIG_NUMA << 4918 .set_policy = shmem_set_policy, << 4919 .get_policy = shmem_get_policy, << 4920 #endif << 4921 }; << 4922 << 4923 int shmem_init_fs_context(struct fs_context * << 4924 { << 4925 struct shmem_options *ctx; << 4926 << 4927 ctx = kzalloc(sizeof(struct shmem_opt << 4928 if (!ctx) << 4929 return -ENOMEM; << 4930 << 4931 ctx->mode = 0777 | S_ISVTX; << 4932 ctx->uid = current_fsuid(); << 4933 ctx->gid = current_fsgid(); << 4934 << 4935 fc->fs_private = ctx; << 4936 fc->ops = &shmem_fs_context_ops; << 4937 return 0; << 4938 } << 4939 << 4940 static struct file_system_type shmem_fs_type << 4941 .owner = THIS_MODULE, << 4942 .name = "tmpfs", << 4943 .init_fs_context = shmem_init_fs_cont << 4944 #ifdef CONFIG_TMPFS 1624 #ifdef CONFIG_TMPFS 4945 .parameters = shmem_fs_parameters !! 1625 /* type "shm" will be tagged obsolete in 2.5 */ >> 1626 static DECLARE_FSTYPE(shmem_fs_type, "shm", shmem_read_super, FS_LITTER); >> 1627 static DECLARE_FSTYPE(tmpfs_fs_type, "tmpfs", shmem_read_super, FS_LITTER); >> 1628 #else >> 1629 static DECLARE_FSTYPE(tmpfs_fs_type, "tmpfs", shmem_read_super, FS_LITTER|FS_NOMOUNT); 4946 #endif 1630 #endif 4947 .kill_sb = kill_litter_super, !! 1631 static struct vfsmount *shm_mnt; 4948 .fs_flags = FS_USERNS_MOUNT | F << 4949 }; << 4950 1632 4951 void __init shmem_init(void) !! 1633 static int __init init_tmpfs(void) 4952 { 1634 { 4953 int error; 1635 int error; 4954 1636 4955 shmem_init_inodecache(); !! 1637 error = register_filesystem(&tmpfs_fs_type); 4956 !! 1638 if (error) { 4957 #ifdef CONFIG_TMPFS_QUOTA !! 1639 printk(KERN_ERR "Could not register tmpfs\n"); 4958 register_quota_format(&shmem_quota_fo !! 1640 goto out3; 4959 #endif !! 1641 } 4960 !! 1642 #ifdef CONFIG_TMPFS 4961 error = register_filesystem(&shmem_fs 1643 error = register_filesystem(&shmem_fs_type); 4962 if (error) { 1644 if (error) { 4963 pr_err("Could not register tm !! 1645 printk(KERN_ERR "Could not register shm fs\n"); 4964 goto out2; 1646 goto out2; 4965 } 1647 } 4966 !! 1648 devfs_mk_dir(NULL, "shm", NULL); 4967 shm_mnt = kern_mount(&shmem_fs_type); !! 1649 #endif >> 1650 shm_mnt = kern_mount(&tmpfs_fs_type); 4968 if (IS_ERR(shm_mnt)) { 1651 if (IS_ERR(shm_mnt)) { 4969 error = PTR_ERR(shm_mnt); 1652 error = PTR_ERR(shm_mnt); 4970 pr_err("Could not kern_mount !! 1653 printk(KERN_ERR "Could not kern_mount tmpfs\n"); 4971 goto out1; 1654 goto out1; 4972 } 1655 } 4973 1656 4974 #ifdef CONFIG_TRANSPARENT_HUGEPAGE !! 1657 /* The internal instance should not do size checking */ 4975 if (has_transparent_hugepage() && shm !! 1658 shmem_set_size(SHMEM_SB(shm_mnt->mnt_sb), ULONG_MAX, ULONG_MAX); 4976 SHMEM_SB(shm_mnt->mnt_sb)->hu !! 1659 return 0; 4977 else << 4978 shmem_huge = SHMEM_HUGE_NEVER << 4979 << 4980 /* << 4981 * Default to setting PMD-sized THP t << 4982 * disable all other multi-size THPs. << 4983 */ << 4984 huge_shmem_orders_inherit = BIT(HPAGE << 4985 #endif << 4986 return; << 4987 1660 4988 out1: 1661 out1: >> 1662 #ifdef CONFIG_TMPFS 4989 unregister_filesystem(&shmem_fs_type) 1663 unregister_filesystem(&shmem_fs_type); 4990 out2: 1664 out2: 4991 #ifdef CONFIG_TMPFS_QUOTA << 4992 unregister_quota_format(&shmem_quota_ << 4993 #endif 1665 #endif 4994 shmem_destroy_inodecache(); !! 1666 unregister_filesystem(&tmpfs_fs_type); >> 1667 out3: 4995 shm_mnt = ERR_PTR(error); 1668 shm_mnt = ERR_PTR(error); >> 1669 return error; 4996 } 1670 } 4997 !! 1671 module_init(init_tmpfs) 4998 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && d << 4999 static ssize_t shmem_enabled_show(struct kobj << 5000 struct kobj << 5001 { << 5002 static const int values[] = { << 5003 SHMEM_HUGE_ALWAYS, << 5004 SHMEM_HUGE_WITHIN_SIZE, << 5005 SHMEM_HUGE_ADVISE, << 5006 SHMEM_HUGE_NEVER, << 5007 SHMEM_HUGE_DENY, << 5008 SHMEM_HUGE_FORCE, << 5009 }; << 5010 int len = 0; << 5011 int i; << 5012 << 5013 for (i = 0; i < ARRAY_SIZE(values); i << 5014 len += sysfs_emit_at(buf, len << 5015 shmem_huge == << 5016 i ? " " : "", << 5017 } << 5018 len += sysfs_emit_at(buf, len, "\n"); << 5019 << 5020 return len; << 5021 } << 5022 << 5023 static ssize_t shmem_enabled_store(struct kob << 5024 struct kobj_attribute *attr, << 5025 { << 5026 char tmp[16]; << 5027 int huge; << 5028 << 5029 if (count + 1 > sizeof(tmp)) << 5030 return -EINVAL; << 5031 memcpy(tmp, buf, count); << 5032 tmp[count] = '\0'; << 5033 if (count && tmp[count - 1] == '\n') << 5034 tmp[count - 1] = '\0'; << 5035 << 5036 huge = shmem_parse_huge(tmp); << 5037 if (huge == -EINVAL) << 5038 return -EINVAL; << 5039 if (!has_transparent_hugepage() && << 5040 huge != SHMEM_HUGE_NE << 5041 return -EINVAL; << 5042 << 5043 /* Do not override huge allocation po << 5044 if (huge == SHMEM_HUGE_FORCE && << 5045 huge_shmem_orders_inherit != BIT( << 5046 return -EINVAL; << 5047 << 5048 shmem_huge = huge; << 5049 if (shmem_huge > SHMEM_HUGE_DENY) << 5050 SHMEM_SB(shm_mnt->mnt_sb)->hu << 5051 return count; << 5052 } << 5053 << 5054 struct kobj_attribute shmem_enabled_attr = __ << 5055 static DEFINE_SPINLOCK(huge_shmem_orders_lock << 5056 << 5057 static ssize_t thpsize_shmem_enabled_show(str << 5058 str << 5059 { << 5060 int order = to_thpsize(kobj)->order; << 5061 const char *output; << 5062 << 5063 if (test_bit(order, &huge_shmem_order << 5064 output = "[always] inherit wi << 5065 else if (test_bit(order, &huge_shmem_ << 5066 output = "always [inherit] wi << 5067 else if (test_bit(order, &huge_shmem_ << 5068 output = "always inherit [wit << 5069 else if (test_bit(order, &huge_shmem_ << 5070 output = "always inherit with << 5071 else << 5072 output = "always inherit with << 5073 << 5074 return sysfs_emit(buf, "%s\n", output << 5075 } << 5076 << 5077 static ssize_t thpsize_shmem_enabled_store(st << 5078 st << 5079 co << 5080 { << 5081 int order = to_thpsize(kobj)->order; << 5082 ssize_t ret = count; << 5083 << 5084 if (sysfs_streq(buf, "always")) { << 5085 spin_lock(&huge_shmem_orders_ << 5086 clear_bit(order, &huge_shmem_ << 5087 clear_bit(order, &huge_shmem_ << 5088 clear_bit(order, &huge_shmem_ << 5089 set_bit(order, &huge_shmem_or << 5090 spin_unlock(&huge_shmem_order << 5091 } else if (sysfs_streq(buf, "inherit" << 5092 /* Do not override huge alloc << 5093 if (shmem_huge == SHMEM_HUGE_ << 5094 order != HPAGE_PMD_ORDER) << 5095 return -EINVAL; << 5096 << 5097 spin_lock(&huge_shmem_orders_ << 5098 clear_bit(order, &huge_shmem_ << 5099 clear_bit(order, &huge_shmem_ << 5100 clear_bit(order, &huge_shmem_ << 5101 set_bit(order, &huge_shmem_or << 5102 spin_unlock(&huge_shmem_order << 5103 } else if (sysfs_streq(buf, "within_s << 5104 spin_lock(&huge_shmem_orders_ << 5105 clear_bit(order, &huge_shmem_ << 5106 clear_bit(order, &huge_shmem_ << 5107 clear_bit(order, &huge_shmem_ << 5108 set_bit(order, &huge_shmem_or << 5109 spin_unlock(&huge_shmem_order << 5110 } else if (sysfs_streq(buf, "advise") << 5111 spin_lock(&huge_shmem_orders_ << 5112 clear_bit(order, &huge_shmem_ << 5113 clear_bit(order, &huge_shmem_ << 5114 clear_bit(order, &huge_shmem_ << 5115 set_bit(order, &huge_shmem_or << 5116 spin_unlock(&huge_shmem_order << 5117 } else if (sysfs_streq(buf, "never")) << 5118 spin_lock(&huge_shmem_orders_ << 5119 clear_bit(order, &huge_shmem_ << 5120 clear_bit(order, &huge_shmem_ << 5121 clear_bit(order, &huge_shmem_ << 5122 clear_bit(order, &huge_shmem_ << 5123 spin_unlock(&huge_shmem_order << 5124 } else { << 5125 ret = -EINVAL; << 5126 } << 5127 << 5128 return ret; << 5129 } << 5130 << 5131 struct kobj_attribute thpsize_shmem_enabled_a << 5132 __ATTR(shmem_enabled, 0644, thpsize_s << 5133 #endif /* CONFIG_TRANSPARENT_HUGEPAGE && CONF << 5134 << 5135 #else /* !CONFIG_SHMEM */ << 5136 1672 5137 /* 1673 /* 5138 * tiny-shmem: simple shmemfs and tmpfs using !! 1674 * shmem_file_setup - get an unlinked file living in tmpfs >> 1675 * >> 1676 * @name: name for dentry (to be seen in /proc/<pid>/maps >> 1677 * @size: size to be set for the file 5139 * 1678 * 5140 * This is intended for small system where th << 5141 * shmem code (swap-backed and resource-limit << 5142 * their complexity. On systems without swap << 5143 * effectively equivalent, but much lighter w << 5144 */ 1679 */ 5145 !! 1680 struct file *shmem_file_setup(char *name, loff_t size) 5146 static struct file_system_type shmem_fs_type << 5147 .name = "tmpfs", << 5148 .init_fs_context = ramfs_init_fs_cont << 5149 .parameters = ramfs_fs_parameters << 5150 .kill_sb = ramfs_kill_sb, << 5151 .fs_flags = FS_USERNS_MOUNT, << 5152 }; << 5153 << 5154 void __init shmem_init(void) << 5155 { << 5156 BUG_ON(register_filesystem(&shmem_fs_ << 5157 << 5158 shm_mnt = kern_mount(&shmem_fs_type); << 5159 BUG_ON(IS_ERR(shm_mnt)); << 5160 } << 5161 << 5162 int shmem_unuse(unsigned int type) << 5163 { << 5164 return 0; << 5165 } << 5166 << 5167 int shmem_lock(struct file *file, int lock, s << 5168 { << 5169 return 0; << 5170 } << 5171 << 5172 void shmem_unlock_mapping(struct address_spac << 5173 { << 5174 } << 5175 << 5176 #ifdef CONFIG_MMU << 5177 unsigned long shmem_get_unmapped_area(struct << 5178 unsigne << 5179 unsigne << 5180 { << 5181 return mm_get_unmapped_area(current-> << 5182 } << 5183 #endif << 5184 << 5185 void shmem_truncate_range(struct inode *inode << 5186 { << 5187 truncate_inode_pages_range(inode->i_m << 5188 } << 5189 EXPORT_SYMBOL_GPL(shmem_truncate_range); << 5190 << 5191 #define shmem_vm_ops << 5192 #define shmem_anon_vm_ops << 5193 #define shmem_file_operations << 5194 #define shmem_acct_size(flags, size) << 5195 #define shmem_unacct_size(flags, size) << 5196 << 5197 static inline struct inode *shmem_get_inode(s << 5198 struct super_ << 5199 umode_t mode, << 5200 { << 5201 struct inode *inode = ramfs_get_inode << 5202 return inode ? inode : ERR_PTR(-ENOSP << 5203 } << 5204 << 5205 #endif /* CONFIG_SHMEM */ << 5206 << 5207 /* common code */ << 5208 << 5209 static struct file *__shmem_file_setup(struct << 5210 loff_t size, unsigned << 5211 { 1681 { >> 1682 int error; >> 1683 struct file *file; 5212 struct inode *inode; 1684 struct inode *inode; 5213 struct file *res; !! 1685 struct dentry *dentry, *root; >> 1686 struct qstr this; >> 1687 int vm_enough_memory(long pages); 5214 1688 5215 if (IS_ERR(mnt)) !! 1689 if (IS_ERR(shm_mnt)) 5216 return ERR_CAST(mnt); !! 1690 return (void *)shm_mnt; 5217 1691 5218 if (size < 0 || size > MAX_LFS_FILESI !! 1692 if (size > SHMEM_MAX_BYTES) 5219 return ERR_PTR(-EINVAL); 1693 return ERR_PTR(-EINVAL); 5220 1694 5221 if (shmem_acct_size(flags, size)) !! 1695 if (!vm_enough_memory(VM_ACCT(size))) 5222 return ERR_PTR(-ENOMEM); 1696 return ERR_PTR(-ENOMEM); 5223 1697 5224 if (is_idmapped_mnt(mnt)) !! 1698 this.name = name; 5225 return ERR_PTR(-EINVAL); !! 1699 this.len = strlen(name); 5226 !! 1700 this.hash = 0; /* will go */ 5227 inode = shmem_get_inode(&nop_mnt_idma !! 1701 root = shm_mnt->mnt_root; 5228 S_IFREG | S_I !! 1702 dentry = d_alloc(root, &this); 5229 if (IS_ERR(inode)) { !! 1703 if (!dentry) 5230 shmem_unacct_size(flags, size !! 1704 return ERR_PTR(-ENOMEM); 5231 return ERR_CAST(inode); << 5232 } << 5233 inode->i_flags |= i_flags; << 5234 inode->i_size = size; << 5235 clear_nlink(inode); /* It is unli << 5236 res = ERR_PTR(ramfs_nommu_expand_for_ << 5237 if (!IS_ERR(res)) << 5238 res = alloc_file_pseudo(inode << 5239 &shmem_file_o << 5240 if (IS_ERR(res)) << 5241 iput(inode); << 5242 return res; << 5243 } << 5244 1705 5245 /** !! 1706 error = -ENFILE; 5246 * shmem_kernel_file_setup - get an unlinked !! 1707 file = get_empty_filp(); 5247 * kernel internal. There will be NO LS !! 1708 if (!file) 5248 * underlying inode. So users of this i !! 1709 goto put_dentry; 5249 * higher layer. The users are the big_ !! 1710 5250 * checks are provided at the key or shm !! 1711 error = -ENOSPC; 5251 * @name: name for dentry (to be seen in /pro !! 1712 inode = shmem_get_inode(root->d_sb, S_IFREG | S_IRWXUGO, 0); 5252 * @size: size to be set for the file !! 1713 if (!inode) 5253 * @flags: VM_NORESERVE suppresses pre-accoun !! 1714 goto close_file; 5254 */ << 5255 struct file *shmem_kernel_file_setup(const ch << 5256 { << 5257 return __shmem_file_setup(shm_mnt, na << 5258 } << 5259 EXPORT_SYMBOL_GPL(shmem_kernel_file_setup); << 5260 1715 5261 /** !! 1716 d_instantiate(dentry, inode); 5262 * shmem_file_setup - get an unlinked file li !! 1717 inode->i_size = size; 5263 * @name: name for dentry (to be seen in /pro !! 1718 inode->i_nlink = 0; /* It is unlinked */ 5264 * @size: size to be set for the file !! 1719 file->f_vfsmnt = mntget(shm_mnt); 5265 * @flags: VM_NORESERVE suppresses pre-accoun !! 1720 file->f_dentry = dentry; 5266 */ !! 1721 file->f_op = &shmem_file_operations; 5267 struct file *shmem_file_setup(const char *nam !! 1722 file->f_mode = FMODE_WRITE | FMODE_READ; 5268 { !! 1723 return file; 5269 return __shmem_file_setup(shm_mnt, na !! 1724 5270 } !! 1725 close_file: 5271 EXPORT_SYMBOL_GPL(shmem_file_setup); !! 1726 put_filp(file); 5272 !! 1727 put_dentry: 5273 /** !! 1728 dput(dentry); 5274 * shmem_file_setup_with_mnt - get an unlinke !! 1729 return ERR_PTR(error); 5275 * @mnt: the tmpfs mount where the file will << 5276 * @name: name for dentry (to be seen in /pro << 5277 * @size: size to be set for the file << 5278 * @flags: VM_NORESERVE suppresses pre-accoun << 5279 */ << 5280 struct file *shmem_file_setup_with_mnt(struct << 5281 loff_t << 5282 { << 5283 return __shmem_file_setup(mnt, name, << 5284 } 1730 } 5285 EXPORT_SYMBOL_GPL(shmem_file_setup_with_mnt); << 5286 1731 5287 /** !! 1732 /* 5288 * shmem_zero_setup - setup a shared anonymou 1733 * shmem_zero_setup - setup a shared anonymous mapping 5289 * @vma: the vma to be mmapped is prepared by !! 1734 * >> 1735 * @vma: the vma to be mmapped is prepared by do_mmap_pgoff 5290 */ 1736 */ 5291 int shmem_zero_setup(struct vm_area_struct *v 1737 int shmem_zero_setup(struct vm_area_struct *vma) 5292 { 1738 { 5293 struct file *file; 1739 struct file *file; 5294 loff_t size = vma->vm_end - vma->vm_s 1740 loff_t size = vma->vm_end - vma->vm_start; 5295 1741 5296 /* !! 1742 file = shmem_file_setup("dev/zero", size); 5297 * Cloning a new file under mmap_lock << 5298 * between XFS directory reading and << 5299 * accessible to the user through its << 5300 * bypass file security, in the same << 5301 */ << 5302 file = shmem_kernel_file_setup("dev/z << 5303 if (IS_ERR(file)) 1743 if (IS_ERR(file)) 5304 return PTR_ERR(file); 1744 return PTR_ERR(file); 5305 1745 5306 if (vma->vm_file) 1746 if (vma->vm_file) 5307 fput(vma->vm_file); 1747 fput(vma->vm_file); 5308 vma->vm_file = file; 1748 vma->vm_file = file; 5309 vma->vm_ops = &shmem_anon_vm_ops; !! 1749 vma->vm_ops = &shmem_vm_ops; 5310 << 5311 return 0; 1750 return 0; 5312 } 1751 } 5313 1752 5314 /** !! 1753 EXPORT_SYMBOL(shmem_file_setup); 5315 * shmem_read_folio_gfp - read into page cach << 5316 * @mapping: the folio's address_space << 5317 * @index: the folio index << 5318 * @gfp: the page allocator flags to u << 5319 * << 5320 * This behaves as a tmpfs "read_cache_page_g << 5321 * with any new page allocations done using t << 5322 * But read_cache_page_gfp() uses the ->read_ << 5323 * suit tmpfs, since it may have pages in swa << 5324 * for itself; although drivers/gpu/drm i915 << 5325 * << 5326 * i915_gem_object_get_pages_gtt() mixes __GF << 5327 * with the mapping_gfp_mask(), to avoid OOMi << 5328 */ << 5329 struct folio *shmem_read_folio_gfp(struct add << 5330 pgoff_t index, gfp_t gfp) << 5331 { << 5332 #ifdef CONFIG_SHMEM << 5333 struct inode *inode = mapping->host; << 5334 struct folio *folio; << 5335 int error; << 5336 << 5337 error = shmem_get_folio_gfp(inode, in << 5338 gfp, NULL << 5339 if (error) << 5340 return ERR_PTR(error); << 5341 << 5342 folio_unlock(folio); << 5343 return folio; << 5344 #else << 5345 /* << 5346 * The tiny !SHMEM case uses ramfs wi << 5347 */ << 5348 return mapping_read_folio_gfp(mapping << 5349 #endif << 5350 } << 5351 EXPORT_SYMBOL_GPL(shmem_read_folio_gfp); << 5352 << 5353 struct page *shmem_read_mapping_page_gfp(stru << 5354 pgof << 5355 { << 5356 struct folio *folio = shmem_read_foli << 5357 struct page *page; << 5358 << 5359 if (IS_ERR(folio)) << 5360 return &folio->page; << 5361 << 5362 page = folio_file_page(folio, index); << 5363 if (PageHWPoison(page)) { << 5364 folio_put(folio); << 5365 return ERR_PTR(-EIO); << 5366 } << 5367 << 5368 return page; << 5369 } << 5370 EXPORT_SYMBOL_GPL(shmem_read_mapping_page_gfp << 5371 1754
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.