1 // SPDX-License-Identifier: GPL-2.0-only << 2 /* 1 /* 3 * (C) 1997 Linus Torvalds 2 * (C) 1997 Linus Torvalds 4 * (C) 1999 Andrea Arcangeli <andrea@suse.de> 3 * (C) 1999 Andrea Arcangeli <andrea@suse.de> (dynamic inode allocation) 5 */ 4 */ 6 #include <linux/export.h> 5 #include <linux/export.h> 7 #include <linux/fs.h> 6 #include <linux/fs.h> 8 #include <linux/filelock.h> << 9 #include <linux/mm.h> 7 #include <linux/mm.h> 10 #include <linux/backing-dev.h> 8 #include <linux/backing-dev.h> 11 #include <linux/hash.h> 9 #include <linux/hash.h> 12 #include <linux/swap.h> 10 #include <linux/swap.h> 13 #include <linux/security.h> 11 #include <linux/security.h> 14 #include <linux/cdev.h> 12 #include <linux/cdev.h> 15 #include <linux/memblock.h> !! 13 #include <linux/bootmem.h> 16 #include <linux/fsnotify.h> 14 #include <linux/fsnotify.h> 17 #include <linux/mount.h> 15 #include <linux/mount.h> 18 #include <linux/posix_acl.h> 16 #include <linux/posix_acl.h> >> 17 #include <linux/prefetch.h> 19 #include <linux/buffer_head.h> /* for inode_ha 18 #include <linux/buffer_head.h> /* for inode_has_buffers */ 20 #include <linux/ratelimit.h> 19 #include <linux/ratelimit.h> 21 #include <linux/list_lru.h> << 22 #include <linux/iversion.h> << 23 #include <linux/rw_hint.h> << 24 #include <trace/events/writeback.h> << 25 #include "internal.h" 20 #include "internal.h" 26 21 27 /* 22 /* 28 * Inode locking rules: 23 * Inode locking rules: 29 * 24 * 30 * inode->i_lock protects: 25 * inode->i_lock protects: 31 * inode->i_state, inode->i_hash, __iget(), !! 26 * inode->i_state, inode->i_hash, __iget() 32 * Inode LRU list locks protect: !! 27 * inode->i_sb->s_inode_lru_lock protects: 33 * inode->i_sb->s_inode_lru, inode->i_lru 28 * inode->i_sb->s_inode_lru, inode->i_lru 34 * inode->i_sb->s_inode_list_lock protects: !! 29 * inode_sb_list_lock protects: 35 * inode->i_sb->s_inodes, inode->i_sb_list !! 30 * sb->s_inodes, inode->i_sb_list 36 * bdi->wb.list_lock protects: 31 * bdi->wb.list_lock protects: 37 * bdi->wb.b_{dirty,io,more_io,dirty_time}, !! 32 * bdi->wb.b_{dirty,io,more_io}, inode->i_wb_list 38 * inode_hash_lock protects: 33 * inode_hash_lock protects: 39 * inode_hashtable, inode->i_hash 34 * inode_hashtable, inode->i_hash 40 * 35 * 41 * Lock ordering: 36 * Lock ordering: 42 * 37 * 43 * inode->i_sb->s_inode_list_lock !! 38 * inode_sb_list_lock 44 * inode->i_lock 39 * inode->i_lock 45 * Inode LRU list locks !! 40 * inode->i_sb->s_inode_lru_lock 46 * 41 * 47 * bdi->wb.list_lock 42 * bdi->wb.list_lock 48 * inode->i_lock 43 * inode->i_lock 49 * 44 * 50 * inode_hash_lock 45 * inode_hash_lock 51 * inode->i_sb->s_inode_list_lock !! 46 * inode_sb_list_lock 52 * inode->i_lock 47 * inode->i_lock 53 * 48 * 54 * iunique_lock 49 * iunique_lock 55 * inode_hash_lock 50 * inode_hash_lock 56 */ 51 */ 57 52 58 static unsigned int i_hash_mask __ro_after_ini !! 53 static unsigned int i_hash_mask __read_mostly; 59 static unsigned int i_hash_shift __ro_after_in !! 54 static unsigned int i_hash_shift __read_mostly; 60 static struct hlist_head *inode_hashtable __ro !! 55 static struct hlist_head *inode_hashtable __read_mostly; 61 static __cacheline_aligned_in_smp DEFINE_SPINL 56 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock); 62 57 >> 58 __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock); >> 59 63 /* 60 /* 64 * Empty aops. Can be used for the cases where 61 * Empty aops. Can be used for the cases where the user does not 65 * define any of the address_space operations. 62 * define any of the address_space operations. 66 */ 63 */ 67 const struct address_space_operations empty_ao 64 const struct address_space_operations empty_aops = { 68 }; 65 }; 69 EXPORT_SYMBOL(empty_aops); 66 EXPORT_SYMBOL(empty_aops); 70 67 71 static DEFINE_PER_CPU(unsigned long, nr_inodes !! 68 /* 72 static DEFINE_PER_CPU(unsigned long, nr_unused !! 69 * Statistics gathering.. >> 70 */ >> 71 struct inodes_stat_t inodes_stat; >> 72 >> 73 static DEFINE_PER_CPU(unsigned int, nr_inodes); >> 74 static DEFINE_PER_CPU(unsigned int, nr_unused); 73 75 74 static struct kmem_cache *inode_cachep __ro_af !! 76 static struct kmem_cache *inode_cachep __read_mostly; 75 77 76 static long get_nr_inodes(void) !! 78 static int get_nr_inodes(void) 77 { 79 { 78 int i; 80 int i; 79 long sum = 0; !! 81 int sum = 0; 80 for_each_possible_cpu(i) 82 for_each_possible_cpu(i) 81 sum += per_cpu(nr_inodes, i); 83 sum += per_cpu(nr_inodes, i); 82 return sum < 0 ? 0 : sum; 84 return sum < 0 ? 0 : sum; 83 } 85 } 84 86 85 static inline long get_nr_inodes_unused(void) !! 87 static inline int get_nr_inodes_unused(void) 86 { 88 { 87 int i; 89 int i; 88 long sum = 0; !! 90 int sum = 0; 89 for_each_possible_cpu(i) 91 for_each_possible_cpu(i) 90 sum += per_cpu(nr_unused, i); 92 sum += per_cpu(nr_unused, i); 91 return sum < 0 ? 0 : sum; 93 return sum < 0 ? 0 : sum; 92 } 94 } 93 95 94 long get_nr_dirty_inodes(void) !! 96 int get_nr_dirty_inodes(void) 95 { 97 { 96 /* not actually dirty inodes, but a wi 98 /* not actually dirty inodes, but a wild approximation */ 97 long nr_dirty = get_nr_inodes() - get_ !! 99 int nr_dirty = get_nr_inodes() - get_nr_inodes_unused(); 98 return nr_dirty > 0 ? nr_dirty : 0; 100 return nr_dirty > 0 ? nr_dirty : 0; 99 } 101 } 100 102 101 /* 103 /* 102 * Handle nr_inode sysctl 104 * Handle nr_inode sysctl 103 */ 105 */ 104 #ifdef CONFIG_SYSCTL 106 #ifdef CONFIG_SYSCTL 105 /* !! 107 int proc_nr_inodes(ctl_table *table, int write, 106 * Statistics gathering.. !! 108 void __user *buffer, size_t *lenp, loff_t *ppos) 107 */ << 108 static struct inodes_stat_t inodes_stat; << 109 << 110 static int proc_nr_inodes(const struct ctl_tab << 111 size_t *lenp, loff_t << 112 { 109 { 113 inodes_stat.nr_inodes = get_nr_inodes( 110 inodes_stat.nr_inodes = get_nr_inodes(); 114 inodes_stat.nr_unused = get_nr_inodes_ 111 inodes_stat.nr_unused = get_nr_inodes_unused(); 115 return proc_doulongvec_minmax(table, w !! 112 return proc_dointvec(table, write, buffer, lenp, ppos); 116 } << 117 << 118 static struct ctl_table inodes_sysctls[] = { << 119 { << 120 .procname = "inode-nr", << 121 .data = &inodes_stat << 122 .maxlen = 2*sizeof(lon << 123 .mode = 0444, << 124 .proc_handler = proc_nr_inod << 125 }, << 126 { << 127 .procname = "inode-state << 128 .data = &inodes_stat << 129 .maxlen = 7*sizeof(lon << 130 .mode = 0444, << 131 .proc_handler = proc_nr_inod << 132 }, << 133 }; << 134 << 135 static int __init init_fs_inode_sysctls(void) << 136 { << 137 register_sysctl_init("fs", inodes_sysc << 138 return 0; << 139 } 113 } 140 early_initcall(init_fs_inode_sysctls); << 141 #endif 114 #endif 142 115 143 static int no_open(struct inode *inode, struct << 144 { << 145 return -ENXIO; << 146 } << 147 << 148 /** 116 /** 149 * inode_init_always_gfp - perform inode struc !! 117 * inode_init_always - perform inode structure intialisation 150 * @sb: superblock inode belongs to 118 * @sb: superblock inode belongs to 151 * @inode: inode to initialise 119 * @inode: inode to initialise 152 * @gfp: allocation flags << 153 * 120 * 154 * These are initializations that need to be d 121 * These are initializations that need to be done on every inode 155 * allocation as the fields are not initialise 122 * allocation as the fields are not initialised by slab allocation. 156 * If there are additional allocations require << 157 */ 123 */ 158 int inode_init_always_gfp(struct super_block * !! 124 int inode_init_always(struct super_block *sb, struct inode *inode) 159 { 125 { 160 static const struct inode_operations e 126 static const struct inode_operations empty_iops; 161 static const struct file_operations no !! 127 static const struct file_operations empty_fops; 162 struct address_space *const mapping = 128 struct address_space *const mapping = &inode->i_data; 163 129 164 inode->i_sb = sb; 130 inode->i_sb = sb; 165 inode->i_blkbits = sb->s_blocksize_bit 131 inode->i_blkbits = sb->s_blocksize_bits; 166 inode->i_flags = 0; 132 inode->i_flags = 0; 167 inode->i_state = 0; << 168 atomic64_set(&inode->i_sequence, 0); << 169 atomic_set(&inode->i_count, 1); 133 atomic_set(&inode->i_count, 1); 170 inode->i_op = &empty_iops; 134 inode->i_op = &empty_iops; 171 inode->i_fop = &no_open_fops; !! 135 inode->i_fop = &empty_fops; 172 inode->i_ino = 0; << 173 inode->__i_nlink = 1; 136 inode->__i_nlink = 1; 174 inode->i_opflags = 0; 137 inode->i_opflags = 0; 175 if (sb->s_xattr) << 176 inode->i_opflags |= IOP_XATTR; << 177 i_uid_write(inode, 0); 138 i_uid_write(inode, 0); 178 i_gid_write(inode, 0); 139 i_gid_write(inode, 0); 179 atomic_set(&inode->i_writecount, 0); 140 atomic_set(&inode->i_writecount, 0); 180 inode->i_size = 0; 141 inode->i_size = 0; 181 inode->i_write_hint = WRITE_LIFE_NOT_S << 182 inode->i_blocks = 0; 142 inode->i_blocks = 0; 183 inode->i_bytes = 0; 143 inode->i_bytes = 0; 184 inode->i_generation = 0; 144 inode->i_generation = 0; >> 145 #ifdef CONFIG_QUOTA >> 146 memset(&inode->i_dquot, 0, sizeof(inode->i_dquot)); >> 147 #endif 185 inode->i_pipe = NULL; 148 inode->i_pipe = NULL; >> 149 inode->i_bdev = NULL; 186 inode->i_cdev = NULL; 150 inode->i_cdev = NULL; 187 inode->i_link = NULL; << 188 inode->i_dir_seq = 0; << 189 inode->i_rdev = 0; 151 inode->i_rdev = 0; 190 inode->dirtied_when = 0; 152 inode->dirtied_when = 0; 191 153 192 #ifdef CONFIG_CGROUP_WRITEBACK !! 154 if (security_inode_alloc(inode)) 193 inode->i_wb_frn_winner = 0; !! 155 goto out; 194 inode->i_wb_frn_avg_time = 0; << 195 inode->i_wb_frn_history = 0; << 196 #endif << 197 << 198 spin_lock_init(&inode->i_lock); 156 spin_lock_init(&inode->i_lock); 199 lockdep_set_class(&inode->i_lock, &sb- 157 lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key); 200 158 201 init_rwsem(&inode->i_rwsem); !! 159 mutex_init(&inode->i_mutex); 202 lockdep_set_class(&inode->i_rwsem, &sb !! 160 lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key); 203 161 204 atomic_set(&inode->i_dio_count, 0); 162 atomic_set(&inode->i_dio_count, 0); 205 163 206 mapping->a_ops = &empty_aops; 164 mapping->a_ops = &empty_aops; 207 mapping->host = inode; 165 mapping->host = inode; 208 mapping->flags = 0; 166 mapping->flags = 0; 209 mapping->wb_err = 0; << 210 atomic_set(&mapping->i_mmap_writable, << 211 #ifdef CONFIG_READ_ONLY_THP_FOR_FS << 212 atomic_set(&mapping->nr_thps, 0); << 213 #endif << 214 mapping_set_gfp_mask(mapping, GFP_HIGH 167 mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE); 215 mapping->i_private_data = NULL; !! 168 mapping->private_data = NULL; >> 169 mapping->backing_dev_info = &default_backing_dev_info; 216 mapping->writeback_index = 0; 170 mapping->writeback_index = 0; 217 init_rwsem(&mapping->invalidate_lock); !! 171 218 lockdep_set_class_and_name(&mapping->i !! 172 /* 219 &sb->s_type !! 173 * If the block_device provides a backing_dev_info for client 220 "mapping.in !! 174 * inodes then use that. Otherwise the inode share the bdev's 221 if (sb->s_iflags & SB_I_STABLE_WRITES) !! 175 * backing_dev_info. 222 mapping_set_stable_writes(mapp !! 176 */ >> 177 if (sb->s_bdev) { >> 178 struct backing_dev_info *bdi; >> 179 >> 180 bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info; >> 181 mapping->backing_dev_info = bdi; >> 182 } 223 inode->i_private = NULL; 183 inode->i_private = NULL; 224 inode->i_mapping = mapping; 184 inode->i_mapping = mapping; 225 INIT_HLIST_HEAD(&inode->i_dentry); 185 INIT_HLIST_HEAD(&inode->i_dentry); /* buggered by rcu freeing */ 226 #ifdef CONFIG_FS_POSIX_ACL 186 #ifdef CONFIG_FS_POSIX_ACL 227 inode->i_acl = inode->i_default_acl = 187 inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED; 228 #endif 188 #endif 229 189 230 #ifdef CONFIG_FSNOTIFY 190 #ifdef CONFIG_FSNOTIFY 231 inode->i_fsnotify_mask = 0; 191 inode->i_fsnotify_mask = 0; 232 #endif 192 #endif 233 inode->i_flctx = NULL; << 234 << 235 if (unlikely(security_inode_alloc(inod << 236 return -ENOMEM; << 237 193 238 this_cpu_inc(nr_inodes); 194 this_cpu_inc(nr_inodes); 239 195 240 return 0; 196 return 0; >> 197 out: >> 198 return -ENOMEM; 241 } 199 } 242 EXPORT_SYMBOL(inode_init_always_gfp); !! 200 EXPORT_SYMBOL(inode_init_always); 243 << 244 void free_inode_nonrcu(struct inode *inode) << 245 { << 246 kmem_cache_free(inode_cachep, inode); << 247 } << 248 EXPORT_SYMBOL(free_inode_nonrcu); << 249 << 250 static void i_callback(struct rcu_head *head) << 251 { << 252 struct inode *inode = container_of(hea << 253 if (inode->free_inode) << 254 inode->free_inode(inode); << 255 else << 256 free_inode_nonrcu(inode); << 257 } << 258 201 259 static struct inode *alloc_inode(struct super_ 202 static struct inode *alloc_inode(struct super_block *sb) 260 { 203 { 261 const struct super_operations *ops = s << 262 struct inode *inode; 204 struct inode *inode; 263 205 264 if (ops->alloc_inode) !! 206 if (sb->s_op->alloc_inode) 265 inode = ops->alloc_inode(sb); !! 207 inode = sb->s_op->alloc_inode(sb); 266 else 208 else 267 inode = alloc_inode_sb(sb, ino !! 209 inode = kmem_cache_alloc(inode_cachep, GFP_KERNEL); 268 210 269 if (!inode) 211 if (!inode) 270 return NULL; 212 return NULL; 271 213 272 if (unlikely(inode_init_always(sb, ino 214 if (unlikely(inode_init_always(sb, inode))) { 273 if (ops->destroy_inode) { !! 215 if (inode->i_sb->s_op->destroy_inode) 274 ops->destroy_inode(ino !! 216 inode->i_sb->s_op->destroy_inode(inode); 275 if (!ops->free_inode) !! 217 else 276 return NULL; !! 218 kmem_cache_free(inode_cachep, inode); 277 } << 278 inode->free_inode = ops->free_ << 279 i_callback(&inode->i_rcu); << 280 return NULL; 219 return NULL; 281 } 220 } 282 221 283 return inode; 222 return inode; 284 } 223 } 285 224 >> 225 void free_inode_nonrcu(struct inode *inode) >> 226 { >> 227 kmem_cache_free(inode_cachep, inode); >> 228 } >> 229 EXPORT_SYMBOL(free_inode_nonrcu); >> 230 286 void __destroy_inode(struct inode *inode) 231 void __destroy_inode(struct inode *inode) 287 { 232 { 288 BUG_ON(inode_has_buffers(inode)); 233 BUG_ON(inode_has_buffers(inode)); 289 inode_detach_wb(inode); << 290 security_inode_free(inode); 234 security_inode_free(inode); 291 fsnotify_inode_delete(inode); 235 fsnotify_inode_delete(inode); 292 locks_free_lock_context(inode); << 293 if (!inode->i_nlink) { 236 if (!inode->i_nlink) { 294 WARN_ON(atomic_long_read(&inod 237 WARN_ON(atomic_long_read(&inode->i_sb->s_remove_count) == 0); 295 atomic_long_dec(&inode->i_sb-> 238 atomic_long_dec(&inode->i_sb->s_remove_count); 296 } 239 } 297 240 298 #ifdef CONFIG_FS_POSIX_ACL 241 #ifdef CONFIG_FS_POSIX_ACL 299 if (inode->i_acl && !is_uncached_acl(i !! 242 if (inode->i_acl && inode->i_acl != ACL_NOT_CACHED) 300 posix_acl_release(inode->i_acl 243 posix_acl_release(inode->i_acl); 301 if (inode->i_default_acl && !is_uncach !! 244 if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED) 302 posix_acl_release(inode->i_def 245 posix_acl_release(inode->i_default_acl); 303 #endif 246 #endif 304 this_cpu_dec(nr_inodes); 247 this_cpu_dec(nr_inodes); 305 } 248 } 306 EXPORT_SYMBOL(__destroy_inode); 249 EXPORT_SYMBOL(__destroy_inode); 307 250 308 static void destroy_inode(struct inode *inode) !! 251 static void i_callback(struct rcu_head *head) 309 { 252 { 310 const struct super_operations *ops = i !! 253 struct inode *inode = container_of(head, struct inode, i_rcu); >> 254 kmem_cache_free(inode_cachep, inode); >> 255 } 311 256 >> 257 static void destroy_inode(struct inode *inode) >> 258 { 312 BUG_ON(!list_empty(&inode->i_lru)); 259 BUG_ON(!list_empty(&inode->i_lru)); 313 __destroy_inode(inode); 260 __destroy_inode(inode); 314 if (ops->destroy_inode) { !! 261 if (inode->i_sb->s_op->destroy_inode) 315 ops->destroy_inode(inode); !! 262 inode->i_sb->s_op->destroy_inode(inode); 316 if (!ops->free_inode) !! 263 else 317 return; !! 264 call_rcu(&inode->i_rcu, i_callback); 318 } << 319 inode->free_inode = ops->free_inode; << 320 call_rcu(&inode->i_rcu, i_callback); << 321 } 265 } 322 266 323 /** 267 /** 324 * drop_nlink - directly drop an inode's link 268 * drop_nlink - directly drop an inode's link count 325 * @inode: inode 269 * @inode: inode 326 * 270 * 327 * This is a low-level filesystem helper to re 271 * This is a low-level filesystem helper to replace any 328 * direct filesystem manipulation of i_nlink. 272 * direct filesystem manipulation of i_nlink. In cases 329 * where we are attempting to track writes to 273 * where we are attempting to track writes to the 330 * filesystem, a decrement to zero means an im 274 * filesystem, a decrement to zero means an imminent 331 * write when the file is truncated and actual 275 * write when the file is truncated and actually unlinked 332 * on the filesystem. 276 * on the filesystem. 333 */ 277 */ 334 void drop_nlink(struct inode *inode) 278 void drop_nlink(struct inode *inode) 335 { 279 { 336 WARN_ON(inode->i_nlink == 0); 280 WARN_ON(inode->i_nlink == 0); 337 inode->__i_nlink--; 281 inode->__i_nlink--; 338 if (!inode->i_nlink) 282 if (!inode->i_nlink) 339 atomic_long_inc(&inode->i_sb-> 283 atomic_long_inc(&inode->i_sb->s_remove_count); 340 } 284 } 341 EXPORT_SYMBOL(drop_nlink); 285 EXPORT_SYMBOL(drop_nlink); 342 286 343 /** 287 /** 344 * clear_nlink - directly zero an inode's link 288 * clear_nlink - directly zero an inode's link count 345 * @inode: inode 289 * @inode: inode 346 * 290 * 347 * This is a low-level filesystem helper to re 291 * This is a low-level filesystem helper to replace any 348 * direct filesystem manipulation of i_nlink. 292 * direct filesystem manipulation of i_nlink. See 349 * drop_nlink() for why we care about i_nlink 293 * drop_nlink() for why we care about i_nlink hitting zero. 350 */ 294 */ 351 void clear_nlink(struct inode *inode) 295 void clear_nlink(struct inode *inode) 352 { 296 { 353 if (inode->i_nlink) { 297 if (inode->i_nlink) { 354 inode->__i_nlink = 0; 298 inode->__i_nlink = 0; 355 atomic_long_inc(&inode->i_sb-> 299 atomic_long_inc(&inode->i_sb->s_remove_count); 356 } 300 } 357 } 301 } 358 EXPORT_SYMBOL(clear_nlink); 302 EXPORT_SYMBOL(clear_nlink); 359 303 360 /** 304 /** 361 * set_nlink - directly set an inode's link co 305 * set_nlink - directly set an inode's link count 362 * @inode: inode 306 * @inode: inode 363 * @nlink: new nlink (should be non-zero) 307 * @nlink: new nlink (should be non-zero) 364 * 308 * 365 * This is a low-level filesystem helper to re 309 * This is a low-level filesystem helper to replace any 366 * direct filesystem manipulation of i_nlink. 310 * direct filesystem manipulation of i_nlink. 367 */ 311 */ 368 void set_nlink(struct inode *inode, unsigned i 312 void set_nlink(struct inode *inode, unsigned int nlink) 369 { 313 { 370 if (!nlink) { 314 if (!nlink) { 371 clear_nlink(inode); 315 clear_nlink(inode); 372 } else { 316 } else { 373 /* Yes, some filesystems do ch 317 /* Yes, some filesystems do change nlink from zero to one */ 374 if (inode->i_nlink == 0) 318 if (inode->i_nlink == 0) 375 atomic_long_dec(&inode 319 atomic_long_dec(&inode->i_sb->s_remove_count); 376 320 377 inode->__i_nlink = nlink; 321 inode->__i_nlink = nlink; 378 } 322 } 379 } 323 } 380 EXPORT_SYMBOL(set_nlink); 324 EXPORT_SYMBOL(set_nlink); 381 325 382 /** 326 /** 383 * inc_nlink - directly increment an inode's l 327 * inc_nlink - directly increment an inode's link count 384 * @inode: inode 328 * @inode: inode 385 * 329 * 386 * This is a low-level filesystem helper to re 330 * This is a low-level filesystem helper to replace any 387 * direct filesystem manipulation of i_nlink. 331 * direct filesystem manipulation of i_nlink. Currently, 388 * it is only here for parity with dec_nlink() 332 * it is only here for parity with dec_nlink(). 389 */ 333 */ 390 void inc_nlink(struct inode *inode) 334 void inc_nlink(struct inode *inode) 391 { 335 { 392 if (unlikely(inode->i_nlink == 0)) { !! 336 if (WARN_ON(inode->i_nlink == 0)) 393 WARN_ON(!(inode->i_state & I_L << 394 atomic_long_dec(&inode->i_sb-> 337 atomic_long_dec(&inode->i_sb->s_remove_count); 395 } << 396 338 397 inode->__i_nlink++; 339 inode->__i_nlink++; 398 } 340 } 399 EXPORT_SYMBOL(inc_nlink); 341 EXPORT_SYMBOL(inc_nlink); 400 342 401 static void __address_space_init_once(struct a << 402 { << 403 xa_init_flags(&mapping->i_pages, XA_FL << 404 init_rwsem(&mapping->i_mmap_rwsem); << 405 INIT_LIST_HEAD(&mapping->i_private_lis << 406 spin_lock_init(&mapping->i_private_loc << 407 mapping->i_mmap = RB_ROOT_CACHED; << 408 } << 409 << 410 void address_space_init_once(struct address_sp 343 void address_space_init_once(struct address_space *mapping) 411 { 344 { 412 memset(mapping, 0, sizeof(*mapping)); 345 memset(mapping, 0, sizeof(*mapping)); 413 __address_space_init_once(mapping); !! 346 INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC); >> 347 spin_lock_init(&mapping->tree_lock); >> 348 mutex_init(&mapping->i_mmap_mutex); >> 349 INIT_LIST_HEAD(&mapping->private_list); >> 350 spin_lock_init(&mapping->private_lock); >> 351 mapping->i_mmap = RB_ROOT; >> 352 INIT_LIST_HEAD(&mapping->i_mmap_nonlinear); 414 } 353 } 415 EXPORT_SYMBOL(address_space_init_once); 354 EXPORT_SYMBOL(address_space_init_once); 416 355 417 /* 356 /* 418 * These are initializations that only need to 357 * These are initializations that only need to be done 419 * once, because the fields are idempotent acr 358 * once, because the fields are idempotent across use 420 * of the inode, so let the slab aware of that 359 * of the inode, so let the slab aware of that. 421 */ 360 */ 422 void inode_init_once(struct inode *inode) 361 void inode_init_once(struct inode *inode) 423 { 362 { 424 memset(inode, 0, sizeof(*inode)); 363 memset(inode, 0, sizeof(*inode)); 425 INIT_HLIST_NODE(&inode->i_hash); 364 INIT_HLIST_NODE(&inode->i_hash); 426 INIT_LIST_HEAD(&inode->i_devices); 365 INIT_LIST_HEAD(&inode->i_devices); 427 INIT_LIST_HEAD(&inode->i_io_list); << 428 INIT_LIST_HEAD(&inode->i_wb_list); 366 INIT_LIST_HEAD(&inode->i_wb_list); 429 INIT_LIST_HEAD(&inode->i_lru); 367 INIT_LIST_HEAD(&inode->i_lru); 430 INIT_LIST_HEAD(&inode->i_sb_list); !! 368 address_space_init_once(&inode->i_data); 431 __address_space_init_once(&inode->i_da << 432 i_size_ordered_init(inode); 369 i_size_ordered_init(inode); >> 370 #ifdef CONFIG_FSNOTIFY >> 371 INIT_HLIST_HEAD(&inode->i_fsnotify_marks); >> 372 #endif 433 } 373 } 434 EXPORT_SYMBOL(inode_init_once); 374 EXPORT_SYMBOL(inode_init_once); 435 375 436 static void init_once(void *foo) 376 static void init_once(void *foo) 437 { 377 { 438 struct inode *inode = (struct inode *) 378 struct inode *inode = (struct inode *) foo; 439 379 440 inode_init_once(inode); 380 inode_init_once(inode); 441 } 381 } 442 382 443 /* 383 /* >> 384 * inode->i_lock must be held >> 385 */ >> 386 void __iget(struct inode *inode) >> 387 { >> 388 atomic_inc(&inode->i_count); >> 389 } >> 390 >> 391 /* 444 * get additional reference to inode; caller m 392 * get additional reference to inode; caller must already hold one. 445 */ 393 */ 446 void ihold(struct inode *inode) 394 void ihold(struct inode *inode) 447 { 395 { 448 WARN_ON(atomic_inc_return(&inode->i_co 396 WARN_ON(atomic_inc_return(&inode->i_count) < 2); 449 } 397 } 450 EXPORT_SYMBOL(ihold); 398 EXPORT_SYMBOL(ihold); 451 399 452 static void __inode_add_lru(struct inode *inod !! 400 static void inode_lru_list_add(struct inode *inode) 453 { 401 { 454 if (inode->i_state & (I_DIRTY_ALL | I_ !! 402 spin_lock(&inode->i_sb->s_inode_lru_lock); 455 return; !! 403 if (list_empty(&inode->i_lru)) { 456 if (atomic_read(&inode->i_count)) !! 404 list_add(&inode->i_lru, &inode->i_sb->s_inode_lru); 457 return; !! 405 inode->i_sb->s_nr_inodes_unused++; 458 if (!(inode->i_sb->s_flags & SB_ACTIVE << 459 return; << 460 if (!mapping_shrinkable(&inode->i_data << 461 return; << 462 << 463 if (list_lru_add_obj(&inode->i_sb->s_i << 464 this_cpu_inc(nr_unused); 406 this_cpu_inc(nr_unused); 465 else if (rotate) !! 407 } 466 inode->i_state |= I_REFERENCED !! 408 spin_unlock(&inode->i_sb->s_inode_lru_lock); 467 } << 468 << 469 struct wait_queue_head *inode_bit_waitqueue(st << 470 st << 471 { << 472 void *bit_address; << 473 << 474 bit_address = inode_state_wait_address << 475 init_wait_var_entry(wqe, bit_address, << 476 return __var_waitqueue(bit_address); << 477 } 409 } 478 EXPORT_SYMBOL(inode_bit_waitqueue); << 479 410 480 /* 411 /* 481 * Add inode to LRU if needed (inode is unused 412 * Add inode to LRU if needed (inode is unused and clean). 482 * 413 * 483 * Needs inode->i_lock held. 414 * Needs inode->i_lock held. 484 */ 415 */ 485 void inode_add_lru(struct inode *inode) 416 void inode_add_lru(struct inode *inode) 486 { 417 { 487 __inode_add_lru(inode, false); !! 418 if (!(inode->i_state & (I_DIRTY | I_SYNC | I_FREEING | I_WILL_FREE)) && >> 419 !atomic_read(&inode->i_count) && inode->i_sb->s_flags & MS_ACTIVE) >> 420 inode_lru_list_add(inode); 488 } 421 } 489 422 >> 423 490 static void inode_lru_list_del(struct inode *i 424 static void inode_lru_list_del(struct inode *inode) 491 { 425 { 492 if (list_lru_del_obj(&inode->i_sb->s_i !! 426 spin_lock(&inode->i_sb->s_inode_lru_lock); >> 427 if (!list_empty(&inode->i_lru)) { >> 428 list_del_init(&inode->i_lru); >> 429 inode->i_sb->s_nr_inodes_unused--; 493 this_cpu_dec(nr_unused); 430 this_cpu_dec(nr_unused); 494 } << 495 << 496 static void inode_pin_lru_isolating(struct ino << 497 { << 498 lockdep_assert_held(&inode->i_lock); << 499 WARN_ON(inode->i_state & (I_LRU_ISOLAT << 500 inode->i_state |= I_LRU_ISOLATING; << 501 } << 502 << 503 static void inode_unpin_lru_isolating(struct i << 504 { << 505 spin_lock(&inode->i_lock); << 506 WARN_ON(!(inode->i_state & I_LRU_ISOLA << 507 inode->i_state &= ~I_LRU_ISOLATING; << 508 /* Called with inode->i_lock which ens << 509 inode_wake_up_bit(inode, __I_LRU_ISOLA << 510 spin_unlock(&inode->i_lock); << 511 } << 512 << 513 static void inode_wait_for_lru_isolating(struc << 514 { << 515 struct wait_bit_queue_entry wqe; << 516 struct wait_queue_head *wq_head; << 517 << 518 lockdep_assert_held(&inode->i_lock); << 519 if (!(inode->i_state & I_LRU_ISOLATING << 520 return; << 521 << 522 wq_head = inode_bit_waitqueue(&wqe, in << 523 for (;;) { << 524 prepare_to_wait_event(wq_head, << 525 /* << 526 * Checking I_LRU_ISOLATING wi << 527 * memory ordering. << 528 */ << 529 if (!(inode->i_state & I_LRU_I << 530 break; << 531 spin_unlock(&inode->i_lock); << 532 schedule(); << 533 spin_lock(&inode->i_lock); << 534 } 431 } 535 finish_wait(wq_head, &wqe.wq_entry); !! 432 spin_unlock(&inode->i_sb->s_inode_lru_lock); 536 WARN_ON(inode->i_state & I_LRU_ISOLATI << 537 } 433 } 538 434 539 /** 435 /** 540 * inode_sb_list_add - add inode to the superb 436 * inode_sb_list_add - add inode to the superblock list of inodes 541 * @inode: inode to add 437 * @inode: inode to add 542 */ 438 */ 543 void inode_sb_list_add(struct inode *inode) 439 void inode_sb_list_add(struct inode *inode) 544 { 440 { 545 spin_lock(&inode->i_sb->s_inode_list_l !! 441 spin_lock(&inode_sb_list_lock); 546 list_add(&inode->i_sb_list, &inode->i_ 442 list_add(&inode->i_sb_list, &inode->i_sb->s_inodes); 547 spin_unlock(&inode->i_sb->s_inode_list !! 443 spin_unlock(&inode_sb_list_lock); 548 } 444 } 549 EXPORT_SYMBOL_GPL(inode_sb_list_add); 445 EXPORT_SYMBOL_GPL(inode_sb_list_add); 550 446 551 static inline void inode_sb_list_del(struct in 447 static inline void inode_sb_list_del(struct inode *inode) 552 { 448 { 553 if (!list_empty(&inode->i_sb_list)) { 449 if (!list_empty(&inode->i_sb_list)) { 554 spin_lock(&inode->i_sb->s_inod !! 450 spin_lock(&inode_sb_list_lock); 555 list_del_init(&inode->i_sb_lis 451 list_del_init(&inode->i_sb_list); 556 spin_unlock(&inode->i_sb->s_in !! 452 spin_unlock(&inode_sb_list_lock); 557 } 453 } 558 } 454 } 559 455 560 static unsigned long hash(struct super_block * 456 static unsigned long hash(struct super_block *sb, unsigned long hashval) 561 { 457 { 562 unsigned long tmp; 458 unsigned long tmp; 563 459 564 tmp = (hashval * (unsigned long)sb) ^ 460 tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) / 565 L1_CACHE_BYTES; 461 L1_CACHE_BYTES; 566 tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME 462 tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> i_hash_shift); 567 return tmp & i_hash_mask; 463 return tmp & i_hash_mask; 568 } 464 } 569 465 570 /** 466 /** 571 * __insert_inode_hash - hash an inode 467 * __insert_inode_hash - hash an inode 572 * @inode: unhashed inode 468 * @inode: unhashed inode 573 * @hashval: unsigned long value used to 469 * @hashval: unsigned long value used to locate this object in the 574 * inode_hashtable. 470 * inode_hashtable. 575 * 471 * 576 * Add an inode to the inode hash for thi 472 * Add an inode to the inode hash for this superblock. 577 */ 473 */ 578 void __insert_inode_hash(struct inode *inode, 474 void __insert_inode_hash(struct inode *inode, unsigned long hashval) 579 { 475 { 580 struct hlist_head *b = inode_hashtable 476 struct hlist_head *b = inode_hashtable + hash(inode->i_sb, hashval); 581 477 582 spin_lock(&inode_hash_lock); 478 spin_lock(&inode_hash_lock); 583 spin_lock(&inode->i_lock); 479 spin_lock(&inode->i_lock); 584 hlist_add_head_rcu(&inode->i_hash, b); !! 480 hlist_add_head(&inode->i_hash, b); 585 spin_unlock(&inode->i_lock); 481 spin_unlock(&inode->i_lock); 586 spin_unlock(&inode_hash_lock); 482 spin_unlock(&inode_hash_lock); 587 } 483 } 588 EXPORT_SYMBOL(__insert_inode_hash); 484 EXPORT_SYMBOL(__insert_inode_hash); 589 485 590 /** 486 /** 591 * __remove_inode_hash - remove an inode 487 * __remove_inode_hash - remove an inode from the hash 592 * @inode: inode to unhash 488 * @inode: inode to unhash 593 * 489 * 594 * Remove an inode from the superblock. 490 * Remove an inode from the superblock. 595 */ 491 */ 596 void __remove_inode_hash(struct inode *inode) 492 void __remove_inode_hash(struct inode *inode) 597 { 493 { 598 spin_lock(&inode_hash_lock); 494 spin_lock(&inode_hash_lock); 599 spin_lock(&inode->i_lock); 495 spin_lock(&inode->i_lock); 600 hlist_del_init_rcu(&inode->i_hash); !! 496 hlist_del_init(&inode->i_hash); 601 spin_unlock(&inode->i_lock); 497 spin_unlock(&inode->i_lock); 602 spin_unlock(&inode_hash_lock); 498 spin_unlock(&inode_hash_lock); 603 } 499 } 604 EXPORT_SYMBOL(__remove_inode_hash); 500 EXPORT_SYMBOL(__remove_inode_hash); 605 501 606 void dump_mapping(const struct address_space * << 607 { << 608 struct inode *host; << 609 const struct address_space_operations << 610 struct hlist_node *dentry_first; << 611 struct dentry *dentry_ptr; << 612 struct dentry dentry; << 613 char fname[64] = {}; << 614 unsigned long ino; << 615 << 616 /* << 617 * If mapping is an invalid pointer, w << 618 * accessing it, so probe everything d << 619 */ << 620 if (get_kernel_nofault(host, &mapping- << 621 get_kernel_nofault(a_ops, &mapping << 622 pr_warn("invalid mapping:%px\n << 623 return; << 624 } << 625 << 626 if (!host) { << 627 pr_warn("aops:%ps\n", a_ops); << 628 return; << 629 } << 630 << 631 if (get_kernel_nofault(dentry_first, & << 632 get_kernel_nofault(ino, &host->i_i << 633 pr_warn("aops:%ps invalid inod << 634 return; << 635 } << 636 << 637 if (!dentry_first) { << 638 pr_warn("aops:%ps ino:%lx\n", << 639 return; << 640 } << 641 << 642 dentry_ptr = container_of(dentry_first << 643 if (get_kernel_nofault(dentry, dentry_ << 644 !dentry.d_parent || !dentry.d_name << 645 pr_warn("aops:%ps ino:%lx inva << 646 a_ops, ino, de << 647 return; << 648 } << 649 << 650 if (strncpy_from_kernel_nofault(fname, << 651 strscpy(fname, "<invalid>"); << 652 /* << 653 * Even if strncpy_from_kernel_nofault << 654 * the fname could be unreliable << 655 */ << 656 pr_warn("aops:%ps ino:%lx dentry name( << 657 a_ops, ino, fname); << 658 } << 659 << 660 void clear_inode(struct inode *inode) 502 void clear_inode(struct inode *inode) 661 { 503 { >> 504 might_sleep(); 662 /* 505 /* 663 * We have to cycle the i_pages lock h !! 506 * We have to cycle tree_lock here because reclaim can be still in the 664 * process of removing the last page ( !! 507 * process of removing the last page (in __delete_from_page_cache()) 665 * and we must not free the mapping un !! 508 * and we must not free mapping under it. 666 */ 509 */ 667 xa_lock_irq(&inode->i_data.i_pages); !! 510 spin_lock_irq(&inode->i_data.tree_lock); 668 BUG_ON(inode->i_data.nrpages); 511 BUG_ON(inode->i_data.nrpages); 669 /* !! 512 spin_unlock_irq(&inode->i_data.tree_lock); 670 * Almost always, mapping_empty(&inode !! 513 BUG_ON(!list_empty(&inode->i_data.private_list)); 671 * two known and long-standing ways in << 672 * (when deep radix-tree node allocati << 673 * collapse_file() failed). Until thos << 674 * or a cleanup function is called her << 675 * nor even WARN_ON(!mapping_empty). << 676 */ << 677 xa_unlock_irq(&inode->i_data.i_pages); << 678 BUG_ON(!list_empty(&inode->i_data.i_pr << 679 BUG_ON(!(inode->i_state & I_FREEING)); 514 BUG_ON(!(inode->i_state & I_FREEING)); 680 BUG_ON(inode->i_state & I_CLEAR); 515 BUG_ON(inode->i_state & I_CLEAR); 681 BUG_ON(!list_empty(&inode->i_wb_list)) << 682 /* don't need i_lock here, no concurre 516 /* don't need i_lock here, no concurrent mods to i_state */ 683 inode->i_state = I_FREEING | I_CLEAR; 517 inode->i_state = I_FREEING | I_CLEAR; 684 } 518 } 685 EXPORT_SYMBOL(clear_inode); 519 EXPORT_SYMBOL(clear_inode); 686 520 687 /* 521 /* 688 * Free the inode passed in, removing it from 522 * Free the inode passed in, removing it from the lists it is still connected 689 * to. We remove any pages still attached to t 523 * to. We remove any pages still attached to the inode and wait for any IO that 690 * is still in progress before finally destroy 524 * is still in progress before finally destroying the inode. 691 * 525 * 692 * An inode must already be marked I_FREEING s 526 * An inode must already be marked I_FREEING so that we avoid the inode being 693 * moved back onto lists if we race with other 527 * moved back onto lists if we race with other code that manipulates the lists 694 * (e.g. writeback_single_inode). The caller i 528 * (e.g. writeback_single_inode). The caller is responsible for setting this. 695 * 529 * 696 * An inode must already be removed from the L 530 * An inode must already be removed from the LRU list before being evicted from 697 * the cache. This should occur atomically wit 531 * the cache. This should occur atomically with setting the I_FREEING state 698 * flag, so no inodes here should ever be on t 532 * flag, so no inodes here should ever be on the LRU when being evicted. 699 */ 533 */ 700 static void evict(struct inode *inode) 534 static void evict(struct inode *inode) 701 { 535 { 702 const struct super_operations *op = in 536 const struct super_operations *op = inode->i_sb->s_op; 703 537 704 BUG_ON(!(inode->i_state & I_FREEING)); 538 BUG_ON(!(inode->i_state & I_FREEING)); 705 BUG_ON(!list_empty(&inode->i_lru)); 539 BUG_ON(!list_empty(&inode->i_lru)); 706 540 707 if (!list_empty(&inode->i_io_list)) !! 541 if (!list_empty(&inode->i_wb_list)) 708 inode_io_list_del(inode); !! 542 inode_wb_list_del(inode); 709 543 710 inode_sb_list_del(inode); 544 inode_sb_list_del(inode); 711 545 712 spin_lock(&inode->i_lock); << 713 inode_wait_for_lru_isolating(inode); << 714 << 715 /* 546 /* 716 * Wait for flusher thread to be done 547 * Wait for flusher thread to be done with the inode so that filesystem 717 * does not start destroying it while 548 * does not start destroying it while writeback is still running. Since 718 * the inode has I_FREEING set, flushe 549 * the inode has I_FREEING set, flusher thread won't start new work on 719 * the inode. We just have to wait fo 550 * the inode. We just have to wait for running writeback to finish. 720 */ 551 */ 721 inode_wait_for_writeback(inode); 552 inode_wait_for_writeback(inode); 722 spin_unlock(&inode->i_lock); << 723 553 724 if (op->evict_inode) { 554 if (op->evict_inode) { 725 op->evict_inode(inode); 555 op->evict_inode(inode); 726 } else { 556 } else { 727 truncate_inode_pages_final(&in !! 557 if (inode->i_data.nrpages) >> 558 truncate_inode_pages(&inode->i_data, 0); 728 clear_inode(inode); 559 clear_inode(inode); 729 } 560 } >> 561 if (S_ISBLK(inode->i_mode) && inode->i_bdev) >> 562 bd_forget(inode); 730 if (S_ISCHR(inode->i_mode) && inode->i 563 if (S_ISCHR(inode->i_mode) && inode->i_cdev) 731 cd_forget(inode); 564 cd_forget(inode); 732 565 733 remove_inode_hash(inode); 566 remove_inode_hash(inode); 734 567 735 /* << 736 * Wake up waiters in __wait_on_freein << 737 * << 738 * Lockless hash lookup may end up fin << 739 * it above, but only lock it *after* << 740 * In this case the potential waiter c << 741 * << 742 * The inode being unhashed after the << 743 * used as an indicator whether blocki << 744 */ << 745 spin_lock(&inode->i_lock); 568 spin_lock(&inode->i_lock); 746 /* !! 569 wake_up_bit(&inode->i_state, __I_NEW); 747 * Pairs with the barrier in prepare_t << 748 * ___wait_var_event() either sees the << 749 * waitqueue_active() check in wake_up << 750 */ << 751 smp_mb(); << 752 inode_wake_up_bit(inode, __I_NEW); << 753 BUG_ON(inode->i_state != (I_FREEING | 570 BUG_ON(inode->i_state != (I_FREEING | I_CLEAR)); 754 spin_unlock(&inode->i_lock); 571 spin_unlock(&inode->i_lock); 755 572 756 destroy_inode(inode); 573 destroy_inode(inode); 757 } 574 } 758 575 759 /* 576 /* 760 * dispose_list - dispose of the contents of a 577 * dispose_list - dispose of the contents of a local list 761 * @head: the head of the list to free 578 * @head: the head of the list to free 762 * 579 * 763 * Dispose-list gets a local list with local i 580 * Dispose-list gets a local list with local inodes in it, so it doesn't 764 * need to worry about list corruption and SMP 581 * need to worry about list corruption and SMP locks. 765 */ 582 */ 766 static void dispose_list(struct list_head *hea 583 static void dispose_list(struct list_head *head) 767 { 584 { 768 while (!list_empty(head)) { 585 while (!list_empty(head)) { 769 struct inode *inode; 586 struct inode *inode; 770 587 771 inode = list_first_entry(head, 588 inode = list_first_entry(head, struct inode, i_lru); 772 list_del_init(&inode->i_lru); 589 list_del_init(&inode->i_lru); 773 590 774 evict(inode); 591 evict(inode); 775 cond_resched(); << 776 } 592 } 777 } 593 } 778 594 779 /** 595 /** 780 * evict_inodes - evict all evictable inodes f 596 * evict_inodes - evict all evictable inodes for a superblock 781 * @sb: superblock to operate on 597 * @sb: superblock to operate on 782 * 598 * 783 * Make sure that no inodes with zero refcount 599 * Make sure that no inodes with zero refcount are retained. This is 784 * called by superblock shutdown after having !! 600 * called by superblock shutdown after having MS_ACTIVE flag removed, 785 * so any inode reaching zero refcount during 601 * so any inode reaching zero refcount during or after that call will 786 * be immediately evicted. 602 * be immediately evicted. 787 */ 603 */ 788 void evict_inodes(struct super_block *sb) 604 void evict_inodes(struct super_block *sb) 789 { 605 { 790 struct inode *inode, *next; 606 struct inode *inode, *next; 791 LIST_HEAD(dispose); 607 LIST_HEAD(dispose); 792 608 793 again: !! 609 spin_lock(&inode_sb_list_lock); 794 spin_lock(&sb->s_inode_list_lock); << 795 list_for_each_entry_safe(inode, next, 610 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { 796 if (atomic_read(&inode->i_coun 611 if (atomic_read(&inode->i_count)) 797 continue; 612 continue; 798 613 799 spin_lock(&inode->i_lock); 614 spin_lock(&inode->i_lock); 800 if (atomic_read(&inode->i_coun << 801 spin_unlock(&inode->i_ << 802 continue; << 803 } << 804 if (inode->i_state & (I_NEW | 615 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { 805 spin_unlock(&inode->i_ 616 spin_unlock(&inode->i_lock); 806 continue; 617 continue; 807 } 618 } 808 619 809 inode->i_state |= I_FREEING; 620 inode->i_state |= I_FREEING; 810 inode_lru_list_del(inode); 621 inode_lru_list_del(inode); 811 spin_unlock(&inode->i_lock); 622 spin_unlock(&inode->i_lock); 812 list_add(&inode->i_lru, &dispo 623 list_add(&inode->i_lru, &dispose); 813 << 814 /* << 815 * We can have a ton of inodes << 816 * enough memory, check to see << 817 * bit so we don't livelock. << 818 */ << 819 if (need_resched()) { << 820 spin_unlock(&sb->s_ino << 821 cond_resched(); << 822 dispose_list(&dispose) << 823 goto again; << 824 } << 825 } 624 } 826 spin_unlock(&sb->s_inode_list_lock); !! 625 spin_unlock(&inode_sb_list_lock); 827 626 828 dispose_list(&dispose); 627 dispose_list(&dispose); 829 } 628 } 830 EXPORT_SYMBOL_GPL(evict_inodes); << 831 629 832 /** 630 /** 833 * invalidate_inodes - attempt to free all 631 * invalidate_inodes - attempt to free all inodes on a superblock 834 * @sb: superblock to operate on 632 * @sb: superblock to operate on >> 633 * @kill_dirty: flag to guide handling of dirty inodes 835 * 634 * 836 * Attempts to free all inodes (including dirt !! 635 * Attempts to free all inodes for a given superblock. If there were any >> 636 * busy inodes return a non-zero value, else zero. >> 637 * If @kill_dirty is set, discard dirty inodes too, otherwise treat >> 638 * them as busy. 837 */ 639 */ 838 void invalidate_inodes(struct super_block *sb) !! 640 int invalidate_inodes(struct super_block *sb, bool kill_dirty) 839 { 641 { >> 642 int busy = 0; 840 struct inode *inode, *next; 643 struct inode *inode, *next; 841 LIST_HEAD(dispose); 644 LIST_HEAD(dispose); 842 645 843 again: !! 646 spin_lock(&inode_sb_list_lock); 844 spin_lock(&sb->s_inode_list_lock); << 845 list_for_each_entry_safe(inode, next, 647 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { 846 spin_lock(&inode->i_lock); 648 spin_lock(&inode->i_lock); 847 if (inode->i_state & (I_NEW | 649 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { 848 spin_unlock(&inode->i_ 650 spin_unlock(&inode->i_lock); 849 continue; 651 continue; 850 } 652 } >> 653 if (inode->i_state & I_DIRTY && !kill_dirty) { >> 654 spin_unlock(&inode->i_lock); >> 655 busy = 1; >> 656 continue; >> 657 } 851 if (atomic_read(&inode->i_coun 658 if (atomic_read(&inode->i_count)) { 852 spin_unlock(&inode->i_ 659 spin_unlock(&inode->i_lock); >> 660 busy = 1; 853 continue; 661 continue; 854 } 662 } 855 663 856 inode->i_state |= I_FREEING; 664 inode->i_state |= I_FREEING; 857 inode_lru_list_del(inode); 665 inode_lru_list_del(inode); 858 spin_unlock(&inode->i_lock); 666 spin_unlock(&inode->i_lock); 859 list_add(&inode->i_lru, &dispo 667 list_add(&inode->i_lru, &dispose); 860 if (need_resched()) { << 861 spin_unlock(&sb->s_ino << 862 cond_resched(); << 863 dispose_list(&dispose) << 864 goto again; << 865 } << 866 } 668 } 867 spin_unlock(&sb->s_inode_list_lock); !! 669 spin_unlock(&inode_sb_list_lock); 868 670 869 dispose_list(&dispose); 671 dispose_list(&dispose); >> 672 >> 673 return busy; >> 674 } >> 675 >> 676 static int can_unuse(struct inode *inode) >> 677 { >> 678 if (inode->i_state & ~I_REFERENCED) >> 679 return 0; >> 680 if (inode_has_buffers(inode)) >> 681 return 0; >> 682 if (atomic_read(&inode->i_count)) >> 683 return 0; >> 684 if (inode->i_data.nrpages) >> 685 return 0; >> 686 return 1; 870 } 687 } 871 688 872 /* 689 /* 873 * Isolate the inode from the LRU in preparati !! 690 * Walk the superblock inode LRU for freeable inodes and attempt to free them. >> 691 * This is called from the superblock shrinker function with a number of inodes >> 692 * to trim from the LRU. Inodes to be freed are moved to a temporary list and >> 693 * then are freed outside inode_lock by dispose_list(). >> 694 * >> 695 * Any inodes which are pinned purely because of attached pagecache have their >> 696 * pagecache removed. If the inode has metadata buffers attached to >> 697 * mapping->private_list then try to remove them. 874 * 698 * 875 * If the inode has the I_REFERENCED flag set, 699 * If the inode has the I_REFERENCED flag set, then it means that it has been 876 * used recently - the flag is set in iput_fin 700 * used recently - the flag is set in iput_final(). When we encounter such an 877 * inode, clear the flag and move it to the ba 701 * inode, clear the flag and move it to the back of the LRU so it gets another 878 * pass through the LRU before it gets reclaim 702 * pass through the LRU before it gets reclaimed. This is necessary because of 879 * the fact we are doing lazy LRU updates to m 703 * the fact we are doing lazy LRU updates to minimise lock contention so the 880 * LRU does not have strict ordering. Hence we 704 * LRU does not have strict ordering. Hence we don't want to reclaim inodes 881 * with this flag set because they are the ino 705 * with this flag set because they are the inodes that are out of order. 882 */ 706 */ 883 static enum lru_status inode_lru_isolate(struc !! 707 void prune_icache_sb(struct super_block *sb, int nr_to_scan) 884 struct list_lru_one *lru, spin << 885 { 708 { 886 struct list_head *freeable = arg; !! 709 LIST_HEAD(freeable); 887 struct inode *inode = container_of( !! 710 int nr_scanned; >> 711 unsigned long reap = 0; 888 712 889 /* !! 713 spin_lock(&sb->s_inode_lru_lock); 890 * We are inverting the lru lock/inode !! 714 for (nr_scanned = nr_to_scan; nr_scanned >= 0; nr_scanned--) { 891 * trylock. If we fail to get the lock !! 715 struct inode *inode; 892 */ << 893 if (!spin_trylock(&inode->i_lock)) << 894 return LRU_SKIP; << 895 716 896 /* !! 717 if (list_empty(&sb->s_inode_lru)) 897 * Inodes can get referenced, redirtie !! 718 break; 898 * they're already on the LRU, and thi << 899 * unreclaimable for a while. Remove t << 900 * sync, or the last page cache deleti << 901 */ << 902 if (atomic_read(&inode->i_count) || << 903 (inode->i_state & ~I_REFERENCED) | << 904 !mapping_shrinkable(&inode->i_data << 905 list_lru_isolate(lru, &inode-> << 906 spin_unlock(&inode->i_lock); << 907 this_cpu_dec(nr_unused); << 908 return LRU_REMOVED; << 909 } << 910 719 911 /* Recently referenced inodes get one !! 720 inode = list_entry(sb->s_inode_lru.prev, struct inode, i_lru); 912 if (inode->i_state & I_REFERENCED) { << 913 inode->i_state &= ~I_REFERENCE << 914 spin_unlock(&inode->i_lock); << 915 return LRU_ROTATE; << 916 } << 917 721 918 /* !! 722 /* 919 * On highmem systems, mapping_shrinka !! 723 * we are inverting the sb->s_inode_lru_lock/inode->i_lock here, 920 * page cache in order to free up stru !! 724 * so use a trylock. If we fail to get the lock, just move the 921 * be under pressure before the cache !! 725 * inode to the back of the list so we don't spin on it. 922 */ !! 726 */ 923 if (inode_has_buffers(inode) || !mappi !! 727 if (!spin_trylock(&inode->i_lock)) { 924 inode_pin_lru_isolating(inode) !! 728 list_move(&inode->i_lru, &sb->s_inode_lru); 925 spin_unlock(&inode->i_lock); !! 729 continue; 926 spin_unlock(lru_lock); !! 730 } 927 if (remove_inode_buffers(inode << 928 unsigned long reap; << 929 reap = invalidate_mapp << 930 if (current_is_kswapd( << 931 __count_vm_eve << 932 else << 933 __count_vm_eve << 934 mm_account_reclaimed_p << 935 } << 936 inode_unpin_lru_isolating(inod << 937 spin_lock(lru_lock); << 938 return LRU_RETRY; << 939 } << 940 731 941 WARN_ON(inode->i_state & I_NEW); !! 732 /* 942 inode->i_state |= I_FREEING; !! 733 * Referenced or dirty inodes are still in use. Give them 943 list_lru_isolate_move(lru, &inode->i_l !! 734 * another pass through the LRU as we canot reclaim them now. 944 spin_unlock(&inode->i_lock); !! 735 */ >> 736 if (atomic_read(&inode->i_count) || >> 737 (inode->i_state & ~I_REFERENCED)) { >> 738 list_del_init(&inode->i_lru); >> 739 spin_unlock(&inode->i_lock); >> 740 sb->s_nr_inodes_unused--; >> 741 this_cpu_dec(nr_unused); >> 742 continue; >> 743 } 945 744 946 this_cpu_dec(nr_unused); !! 745 /* recently referenced inodes get one more pass */ 947 return LRU_REMOVED; !! 746 if (inode->i_state & I_REFERENCED) { 948 } !! 747 inode->i_state &= ~I_REFERENCED; >> 748 list_move(&inode->i_lru, &sb->s_inode_lru); >> 749 spin_unlock(&inode->i_lock); >> 750 continue; >> 751 } >> 752 if (inode_has_buffers(inode) || inode->i_data.nrpages) { >> 753 __iget(inode); >> 754 spin_unlock(&inode->i_lock); >> 755 spin_unlock(&sb->s_inode_lru_lock); >> 756 if (remove_inode_buffers(inode)) >> 757 reap += invalidate_mapping_pages(&inode->i_data, >> 758 0, -1); >> 759 iput(inode); >> 760 spin_lock(&sb->s_inode_lru_lock); 949 761 950 /* !! 762 if (inode != list_entry(sb->s_inode_lru.next, 951 * Walk the superblock inode LRU for freeable !! 763 struct inode, i_lru)) 952 * This is called from the superblock shrinker !! 764 continue; /* wrong inode or list_empty */ 953 * to trim from the LRU. Inodes to be freed ar !! 765 /* avoid lock inversions with trylock */ 954 * then are freed outside inode_lock by dispos !! 766 if (!spin_trylock(&inode->i_lock)) 955 */ !! 767 continue; 956 long prune_icache_sb(struct super_block *sb, s !! 768 if (!can_unuse(inode)) { 957 { !! 769 spin_unlock(&inode->i_lock); 958 LIST_HEAD(freeable); !! 770 continue; 959 long freed; !! 771 } >> 772 } >> 773 WARN_ON(inode->i_state & I_NEW); >> 774 inode->i_state |= I_FREEING; >> 775 spin_unlock(&inode->i_lock); >> 776 >> 777 list_move(&inode->i_lru, &freeable); >> 778 sb->s_nr_inodes_unused--; >> 779 this_cpu_dec(nr_unused); >> 780 } >> 781 if (current_is_kswapd()) >> 782 __count_vm_events(KSWAPD_INODESTEAL, reap); >> 783 else >> 784 __count_vm_events(PGINODESTEAL, reap); >> 785 spin_unlock(&sb->s_inode_lru_lock); >> 786 if (current->reclaim_state) >> 787 current->reclaim_state->reclaimed_slab += reap; 960 788 961 freed = list_lru_shrink_walk(&sb->s_in << 962 inode_lru << 963 dispose_list(&freeable); 789 dispose_list(&freeable); 964 return freed; << 965 } 790 } 966 791 967 static void __wait_on_freeing_inode(struct ino !! 792 static void __wait_on_freeing_inode(struct inode *inode); 968 /* 793 /* 969 * Called with the inode lock held. 794 * Called with the inode lock held. 970 */ 795 */ 971 static struct inode *find_inode(struct super_b 796 static struct inode *find_inode(struct super_block *sb, 972 struct hlist_h 797 struct hlist_head *head, 973 int (*test)(st 798 int (*test)(struct inode *, void *), 974 void *data, bo !! 799 void *data) 975 { 800 { 976 struct inode *inode = NULL; 801 struct inode *inode = NULL; 977 802 978 if (is_inode_hash_locked) << 979 lockdep_assert_held(&inode_has << 980 else << 981 lockdep_assert_not_held(&inode << 982 << 983 rcu_read_lock(); << 984 repeat: 803 repeat: 985 hlist_for_each_entry_rcu(inode, head, !! 804 hlist_for_each_entry(inode, head, i_hash) { 986 if (inode->i_sb != sb) !! 805 spin_lock(&inode->i_lock); >> 806 if (inode->i_sb != sb) { >> 807 spin_unlock(&inode->i_lock); 987 continue; 808 continue; 988 if (!test(inode, data)) !! 809 } >> 810 if (!test(inode, data)) { >> 811 spin_unlock(&inode->i_lock); 989 continue; 812 continue; 990 spin_lock(&inode->i_lock); !! 813 } 991 if (inode->i_state & (I_FREEIN 814 if (inode->i_state & (I_FREEING|I_WILL_FREE)) { 992 __wait_on_freeing_inod !! 815 __wait_on_freeing_inode(inode); 993 goto repeat; 816 goto repeat; 994 } 817 } 995 if (unlikely(inode->i_state & << 996 spin_unlock(&inode->i_ << 997 rcu_read_unlock(); << 998 return ERR_PTR(-ESTALE << 999 } << 1000 __iget(inode); 818 __iget(inode); 1001 spin_unlock(&inode->i_lock); 819 spin_unlock(&inode->i_lock); 1002 rcu_read_unlock(); << 1003 return inode; 820 return inode; 1004 } 821 } 1005 rcu_read_unlock(); << 1006 return NULL; 822 return NULL; 1007 } 823 } 1008 824 1009 /* 825 /* 1010 * find_inode_fast is the fast path version o 826 * find_inode_fast is the fast path version of find_inode, see the comment at 1011 * iget_locked for details. 827 * iget_locked for details. 1012 */ 828 */ 1013 static struct inode *find_inode_fast(struct s 829 static struct inode *find_inode_fast(struct super_block *sb, 1014 struct hlist_ !! 830 struct hlist_head *head, unsigned long ino) 1015 bool is_inode << 1016 { 831 { 1017 struct inode *inode = NULL; 832 struct inode *inode = NULL; 1018 833 1019 if (is_inode_hash_locked) << 1020 lockdep_assert_held(&inode_ha << 1021 else << 1022 lockdep_assert_not_held(&inod << 1023 << 1024 rcu_read_lock(); << 1025 repeat: 834 repeat: 1026 hlist_for_each_entry_rcu(inode, head, !! 835 hlist_for_each_entry(inode, head, i_hash) { 1027 if (inode->i_ino != ino) !! 836 spin_lock(&inode->i_lock); >> 837 if (inode->i_ino != ino) { >> 838 spin_unlock(&inode->i_lock); 1028 continue; 839 continue; 1029 if (inode->i_sb != sb) !! 840 } >> 841 if (inode->i_sb != sb) { >> 842 spin_unlock(&inode->i_lock); 1030 continue; 843 continue; 1031 spin_lock(&inode->i_lock); !! 844 } 1032 if (inode->i_state & (I_FREEI 845 if (inode->i_state & (I_FREEING|I_WILL_FREE)) { 1033 __wait_on_freeing_ino !! 846 __wait_on_freeing_inode(inode); 1034 goto repeat; 847 goto repeat; 1035 } 848 } 1036 if (unlikely(inode->i_state & << 1037 spin_unlock(&inode->i << 1038 rcu_read_unlock(); << 1039 return ERR_PTR(-ESTAL << 1040 } << 1041 __iget(inode); 849 __iget(inode); 1042 spin_unlock(&inode->i_lock); 850 spin_unlock(&inode->i_lock); 1043 rcu_read_unlock(); << 1044 return inode; 851 return inode; 1045 } 852 } 1046 rcu_read_unlock(); << 1047 return NULL; 853 return NULL; 1048 } 854 } 1049 855 1050 /* 856 /* 1051 * Each cpu owns a range of LAST_INO_BATCH nu 857 * Each cpu owns a range of LAST_INO_BATCH numbers. 1052 * 'shared_last_ino' is dirtied only once out 858 * 'shared_last_ino' is dirtied only once out of LAST_INO_BATCH allocations, 1053 * to renew the exhausted range. 859 * to renew the exhausted range. 1054 * 860 * 1055 * This does not significantly increase overf 861 * This does not significantly increase overflow rate because every CPU can 1056 * consume at most LAST_INO_BATCH-1 unused in 862 * consume at most LAST_INO_BATCH-1 unused inode numbers. So there is 1057 * NR_CPUS*(LAST_INO_BATCH-1) wastage. At 409 863 * NR_CPUS*(LAST_INO_BATCH-1) wastage. At 4096 and 1024, this is ~0.1% of the 1058 * 2^32 range, and is a worst-case. Even a 50 864 * 2^32 range, and is a worst-case. Even a 50% wastage would only increase 1059 * overflow rate by 2x, which does not seem t 865 * overflow rate by 2x, which does not seem too significant. 1060 * 866 * 1061 * On a 32bit, non LFS stat() call, glibc wil 867 * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW 1062 * error if st_ino won't fit in target struct 868 * error if st_ino won't fit in target struct field. Use 32bit counter 1063 * here to attempt to avoid that. 869 * here to attempt to avoid that. 1064 */ 870 */ 1065 #define LAST_INO_BATCH 1024 871 #define LAST_INO_BATCH 1024 1066 static DEFINE_PER_CPU(unsigned int, last_ino) 872 static DEFINE_PER_CPU(unsigned int, last_ino); 1067 873 1068 unsigned int get_next_ino(void) 874 unsigned int get_next_ino(void) 1069 { 875 { 1070 unsigned int *p = &get_cpu_var(last_i 876 unsigned int *p = &get_cpu_var(last_ino); 1071 unsigned int res = *p; 877 unsigned int res = *p; 1072 878 1073 #ifdef CONFIG_SMP 879 #ifdef CONFIG_SMP 1074 if (unlikely((res & (LAST_INO_BATCH-1 880 if (unlikely((res & (LAST_INO_BATCH-1)) == 0)) { 1075 static atomic_t shared_last_i 881 static atomic_t shared_last_ino; 1076 int next = atomic_add_return( 882 int next = atomic_add_return(LAST_INO_BATCH, &shared_last_ino); 1077 883 1078 res = next - LAST_INO_BATCH; 884 res = next - LAST_INO_BATCH; 1079 } 885 } 1080 #endif 886 #endif 1081 887 1082 res++; !! 888 *p = ++res; 1083 /* get_next_ino should not provide a << 1084 if (unlikely(!res)) << 1085 res++; << 1086 *p = res; << 1087 put_cpu_var(last_ino); 889 put_cpu_var(last_ino); 1088 return res; 890 return res; 1089 } 891 } 1090 EXPORT_SYMBOL(get_next_ino); 892 EXPORT_SYMBOL(get_next_ino); 1091 893 1092 /** 894 /** 1093 * new_inode_pseudo - obtain an i 895 * new_inode_pseudo - obtain an inode 1094 * @sb: superblock 896 * @sb: superblock 1095 * 897 * 1096 * Allocates a new inode for given super 898 * Allocates a new inode for given superblock. 1097 * Inode wont be chained in superblock s 899 * Inode wont be chained in superblock s_inodes list 1098 * This means : 900 * This means : 1099 * - fs can't be unmount 901 * - fs can't be unmount 1100 * - quotas, fsnotify, writeback can't w 902 * - quotas, fsnotify, writeback can't work 1101 */ 903 */ 1102 struct inode *new_inode_pseudo(struct super_b 904 struct inode *new_inode_pseudo(struct super_block *sb) 1103 { 905 { 1104 return alloc_inode(sb); !! 906 struct inode *inode = alloc_inode(sb); >> 907 >> 908 if (inode) { >> 909 spin_lock(&inode->i_lock); >> 910 inode->i_state = 0; >> 911 spin_unlock(&inode->i_lock); >> 912 INIT_LIST_HEAD(&inode->i_sb_list); >> 913 } >> 914 return inode; 1105 } 915 } 1106 916 1107 /** 917 /** 1108 * new_inode - obtain an inode 918 * new_inode - obtain an inode 1109 * @sb: superblock 919 * @sb: superblock 1110 * 920 * 1111 * Allocates a new inode for given super 921 * Allocates a new inode for given superblock. The default gfp_mask 1112 * for allocations related to inode->i_m 922 * for allocations related to inode->i_mapping is GFP_HIGHUSER_MOVABLE. 1113 * If HIGHMEM pages are unsuitable or it 923 * If HIGHMEM pages are unsuitable or it is known that pages allocated 1114 * for the page cache are not reclaimabl 924 * for the page cache are not reclaimable or migratable, 1115 * mapping_set_gfp_mask() must be called 925 * mapping_set_gfp_mask() must be called with suitable flags on the 1116 * newly created inode's mapping 926 * newly created inode's mapping 1117 * 927 * 1118 */ 928 */ 1119 struct inode *new_inode(struct super_block *s 929 struct inode *new_inode(struct super_block *sb) 1120 { 930 { 1121 struct inode *inode; 931 struct inode *inode; 1122 932 >> 933 spin_lock_prefetch(&inode_sb_list_lock); >> 934 1123 inode = new_inode_pseudo(sb); 935 inode = new_inode_pseudo(sb); 1124 if (inode) 936 if (inode) 1125 inode_sb_list_add(inode); 937 inode_sb_list_add(inode); 1126 return inode; 938 return inode; 1127 } 939 } 1128 EXPORT_SYMBOL(new_inode); 940 EXPORT_SYMBOL(new_inode); 1129 941 1130 #ifdef CONFIG_DEBUG_LOCK_ALLOC 942 #ifdef CONFIG_DEBUG_LOCK_ALLOC 1131 void lockdep_annotate_inode_mutex_key(struct 943 void lockdep_annotate_inode_mutex_key(struct inode *inode) 1132 { 944 { 1133 if (S_ISDIR(inode->i_mode)) { 945 if (S_ISDIR(inode->i_mode)) { 1134 struct file_system_type *type 946 struct file_system_type *type = inode->i_sb->s_type; 1135 947 1136 /* Set new key only if filesy 948 /* Set new key only if filesystem hasn't already changed it */ 1137 if (lockdep_match_class(&inod !! 949 if (lockdep_match_class(&inode->i_mutex, &type->i_mutex_key)) { 1138 /* 950 /* 1139 * ensure nobody is a 951 * ensure nobody is actually holding i_mutex 1140 */ 952 */ 1141 // mutex_destroy(&ino !! 953 mutex_destroy(&inode->i_mutex); 1142 init_rwsem(&inode->i_ !! 954 mutex_init(&inode->i_mutex); 1143 lockdep_set_class(&in !! 955 lockdep_set_class(&inode->i_mutex, 1144 &ty 956 &type->i_mutex_dir_key); 1145 } 957 } 1146 } 958 } 1147 } 959 } 1148 EXPORT_SYMBOL(lockdep_annotate_inode_mutex_ke 960 EXPORT_SYMBOL(lockdep_annotate_inode_mutex_key); 1149 #endif 961 #endif 1150 962 1151 /** 963 /** 1152 * unlock_new_inode - clear the I_NEW state a 964 * unlock_new_inode - clear the I_NEW state and wake up any waiters 1153 * @inode: new inode to unlock 965 * @inode: new inode to unlock 1154 * 966 * 1155 * Called when the inode is fully initialised 967 * Called when the inode is fully initialised to clear the new state of the 1156 * inode and wake up anyone waiting for the i 968 * inode and wake up anyone waiting for the inode to finish initialisation. 1157 */ 969 */ 1158 void unlock_new_inode(struct inode *inode) 970 void unlock_new_inode(struct inode *inode) 1159 { 971 { 1160 lockdep_annotate_inode_mutex_key(inod 972 lockdep_annotate_inode_mutex_key(inode); 1161 spin_lock(&inode->i_lock); 973 spin_lock(&inode->i_lock); 1162 WARN_ON(!(inode->i_state & I_NEW)); 974 WARN_ON(!(inode->i_state & I_NEW)); 1163 inode->i_state &= ~I_NEW & ~I_CREATIN << 1164 /* << 1165 * Pairs with the barrier in prepare_ << 1166 * ___wait_var_event() either sees th << 1167 * waitqueue_active() check in wake_u << 1168 */ << 1169 smp_mb(); << 1170 inode_wake_up_bit(inode, __I_NEW); << 1171 spin_unlock(&inode->i_lock); << 1172 } << 1173 EXPORT_SYMBOL(unlock_new_inode); << 1174 << 1175 void discard_new_inode(struct inode *inode) << 1176 { << 1177 lockdep_annotate_inode_mutex_key(inod << 1178 spin_lock(&inode->i_lock); << 1179 WARN_ON(!(inode->i_state & I_NEW)); << 1180 inode->i_state &= ~I_NEW; 975 inode->i_state &= ~I_NEW; 1181 /* << 1182 * Pairs with the barrier in prepare_ << 1183 * ___wait_var_event() either sees th << 1184 * waitqueue_active() check in wake_u << 1185 */ << 1186 smp_mb(); 976 smp_mb(); 1187 inode_wake_up_bit(inode, __I_NEW); !! 977 wake_up_bit(&inode->i_state, __I_NEW); 1188 spin_unlock(&inode->i_lock); 978 spin_unlock(&inode->i_lock); 1189 iput(inode); << 1190 } 979 } 1191 EXPORT_SYMBOL(discard_new_inode); !! 980 EXPORT_SYMBOL(unlock_new_inode); 1192 << 1193 /** << 1194 * lock_two_nondirectories - take two i_mutex << 1195 * << 1196 * Lock any non-NULL argument. Passed objects << 1197 * Zero, one or two objects may be locked by << 1198 * << 1199 * @inode1: first inode to lock << 1200 * @inode2: second inode to lock << 1201 */ << 1202 void lock_two_nondirectories(struct inode *in << 1203 { << 1204 if (inode1) << 1205 WARN_ON_ONCE(S_ISDIR(inode1-> << 1206 if (inode2) << 1207 WARN_ON_ONCE(S_ISDIR(inode2-> << 1208 if (inode1 > inode2) << 1209 swap(inode1, inode2); << 1210 if (inode1) << 1211 inode_lock(inode1); << 1212 if (inode2 && inode2 != inode1) << 1213 inode_lock_nested(inode2, I_M << 1214 } << 1215 EXPORT_SYMBOL(lock_two_nondirectories); << 1216 << 1217 /** << 1218 * unlock_two_nondirectories - release locks << 1219 * @inode1: first inode to unlock << 1220 * @inode2: second inode to unlock << 1221 */ << 1222 void unlock_two_nondirectories(struct inode * << 1223 { << 1224 if (inode1) { << 1225 WARN_ON_ONCE(S_ISDIR(inode1-> << 1226 inode_unlock(inode1); << 1227 } << 1228 if (inode2 && inode2 != inode1) { << 1229 WARN_ON_ONCE(S_ISDIR(inode2-> << 1230 inode_unlock(inode2); << 1231 } << 1232 } << 1233 EXPORT_SYMBOL(unlock_two_nondirectories); << 1234 << 1235 /** << 1236 * inode_insert5 - obtain an inode from a mou << 1237 * @inode: pre-allocated inode to use fo << 1238 * @hashval: hash value (usually inode num << 1239 * @test: callback used for comparisons << 1240 * @set: callback used to initialize a << 1241 * @data: opaque data pointer to pass t << 1242 * << 1243 * Search for the inode specified by @hashval << 1244 * and if present it is return it with an inc << 1245 * a variant of iget5_locked() for callers th << 1246 * allocation of inode. << 1247 * << 1248 * If the inode is not in cache, insert the p << 1249 * return it locked, hashed, and with the I_N << 1250 * to fill it in before unlocking it via unlo << 1251 * << 1252 * Note both @test and @set are called with t << 1253 * sleep. << 1254 */ << 1255 struct inode *inode_insert5(struct inode *ino << 1256 int (*test)(struc << 1257 int (*set)(struct << 1258 { << 1259 struct hlist_head *head = inode_hasht << 1260 struct inode *old; << 1261 << 1262 again: << 1263 spin_lock(&inode_hash_lock); << 1264 old = find_inode(inode->i_sb, head, t << 1265 if (unlikely(old)) { << 1266 /* << 1267 * Uhhuh, somebody else creat << 1268 * Use the old inode instead << 1269 */ << 1270 spin_unlock(&inode_hash_lock) << 1271 if (IS_ERR(old)) << 1272 return NULL; << 1273 wait_on_inode(old); << 1274 if (unlikely(inode_unhashed(o << 1275 iput(old); << 1276 goto again; << 1277 } << 1278 return old; << 1279 } << 1280 << 1281 if (set && unlikely(set(inode, data)) << 1282 inode = NULL; << 1283 goto unlock; << 1284 } << 1285 << 1286 /* << 1287 * Return the locked inode with I_NEW << 1288 * caller is responsible for filling << 1289 */ << 1290 spin_lock(&inode->i_lock); << 1291 inode->i_state |= I_NEW; << 1292 hlist_add_head_rcu(&inode->i_hash, he << 1293 spin_unlock(&inode->i_lock); << 1294 << 1295 /* << 1296 * Add inode to the sb list if it's n << 1297 * point, so it should be safe to tes << 1298 */ << 1299 if (list_empty(&inode->i_sb_list)) << 1300 inode_sb_list_add(inode); << 1301 unlock: << 1302 spin_unlock(&inode_hash_lock); << 1303 << 1304 return inode; << 1305 } << 1306 EXPORT_SYMBOL(inode_insert5); << 1307 981 1308 /** 982 /** 1309 * iget5_locked - obtain an inode from a moun 983 * iget5_locked - obtain an inode from a mounted file system 1310 * @sb: super block of file system 984 * @sb: super block of file system 1311 * @hashval: hash value (usually inode num 985 * @hashval: hash value (usually inode number) to get 1312 * @test: callback used for comparisons 986 * @test: callback used for comparisons between inodes 1313 * @set: callback used to initialize a 987 * @set: callback used to initialize a new struct inode 1314 * @data: opaque data pointer to pass t 988 * @data: opaque data pointer to pass to @test and @set 1315 * 989 * 1316 * Search for the inode specified by @hashval 990 * Search for the inode specified by @hashval and @data in the inode cache, 1317 * and if present it is return it with an inc 991 * and if present it is return it with an increased reference count. This is 1318 * a generalized version of iget_locked() for 992 * a generalized version of iget_locked() for file systems where the inode 1319 * number is not sufficient for unique identi 993 * number is not sufficient for unique identification of an inode. 1320 * 994 * 1321 * If the inode is not in cache, allocate a n 995 * If the inode is not in cache, allocate a new inode and return it locked, 1322 * hashed, and with the I_NEW flag set. The f 996 * hashed, and with the I_NEW flag set. The file system gets to fill it in 1323 * before unlocking it via unlock_new_inode() 997 * before unlocking it via unlock_new_inode(). 1324 * 998 * 1325 * Note both @test and @set are called with t 999 * Note both @test and @set are called with the inode_hash_lock held, so can't 1326 * sleep. 1000 * sleep. 1327 */ 1001 */ 1328 struct inode *iget5_locked(struct super_block 1002 struct inode *iget5_locked(struct super_block *sb, unsigned long hashval, 1329 int (*test)(struct inode *, v 1003 int (*test)(struct inode *, void *), 1330 int (*set)(struct inode *, vo 1004 int (*set)(struct inode *, void *), void *data) 1331 { 1005 { 1332 struct inode *inode = ilookup5(sb, ha << 1333 << 1334 if (!inode) { << 1335 struct inode *new = alloc_ino << 1336 << 1337 if (new) { << 1338 inode = inode_insert5 << 1339 if (unlikely(inode != << 1340 destroy_inode << 1341 } << 1342 } << 1343 return inode; << 1344 } << 1345 EXPORT_SYMBOL(iget5_locked); << 1346 << 1347 /** << 1348 * iget5_locked_rcu - obtain an inode from a << 1349 * @sb: super block of file system << 1350 * @hashval: hash value (usually inode num << 1351 * @test: callback used for comparisons << 1352 * @set: callback used to initialize a << 1353 * @data: opaque data pointer to pass t << 1354 * << 1355 * This is equivalent to iget5_locked, except << 1356 * tolerate the inode not being stable, inclu << 1357 */ << 1358 struct inode *iget5_locked_rcu(struct super_b << 1359 int (*test)(struct inode *, v << 1360 int (*set)(struct inode *, vo << 1361 { << 1362 struct hlist_head *head = inode_hasht 1006 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 1363 struct inode *inode, *new; !! 1007 struct inode *inode; >> 1008 >> 1009 spin_lock(&inode_hash_lock); >> 1010 inode = find_inode(sb, head, test, data); >> 1011 spin_unlock(&inode_hash_lock); 1364 1012 1365 again: << 1366 inode = find_inode(sb, head, test, da << 1367 if (inode) { 1013 if (inode) { 1368 if (IS_ERR(inode)) << 1369 return NULL; << 1370 wait_on_inode(inode); 1014 wait_on_inode(inode); 1371 if (unlikely(inode_unhashed(i << 1372 iput(inode); << 1373 goto again; << 1374 } << 1375 return inode; 1015 return inode; 1376 } 1016 } 1377 1017 1378 new = alloc_inode(sb); !! 1018 inode = alloc_inode(sb); 1379 if (new) { !! 1019 if (inode) { 1380 inode = inode_insert5(new, ha !! 1020 struct inode *old; 1381 if (unlikely(inode != new)) !! 1021 1382 destroy_inode(new); !! 1022 spin_lock(&inode_hash_lock); >> 1023 /* We released the lock, so.. */ >> 1024 old = find_inode(sb, head, test, data); >> 1025 if (!old) { >> 1026 if (set(inode, data)) >> 1027 goto set_failed; >> 1028 >> 1029 spin_lock(&inode->i_lock); >> 1030 inode->i_state = I_NEW; >> 1031 hlist_add_head(&inode->i_hash, head); >> 1032 spin_unlock(&inode->i_lock); >> 1033 inode_sb_list_add(inode); >> 1034 spin_unlock(&inode_hash_lock); >> 1035 >> 1036 /* Return the locked inode with I_NEW set, the >> 1037 * caller is responsible for filling in the contents >> 1038 */ >> 1039 return inode; >> 1040 } >> 1041 >> 1042 /* >> 1043 * Uhhuh, somebody else created the same inode under >> 1044 * us. Use the old inode instead of the one we just >> 1045 * allocated. >> 1046 */ >> 1047 spin_unlock(&inode_hash_lock); >> 1048 destroy_inode(inode); >> 1049 inode = old; >> 1050 wait_on_inode(inode); 1383 } 1051 } 1384 return inode; 1052 return inode; >> 1053 >> 1054 set_failed: >> 1055 spin_unlock(&inode_hash_lock); >> 1056 destroy_inode(inode); >> 1057 return NULL; 1385 } 1058 } 1386 EXPORT_SYMBOL_GPL(iget5_locked_rcu); !! 1059 EXPORT_SYMBOL(iget5_locked); 1387 1060 1388 /** 1061 /** 1389 * iget_locked - obtain an inode from a mount 1062 * iget_locked - obtain an inode from a mounted file system 1390 * @sb: super block of file system 1063 * @sb: super block of file system 1391 * @ino: inode number to get 1064 * @ino: inode number to get 1392 * 1065 * 1393 * Search for the inode specified by @ino in 1066 * Search for the inode specified by @ino in the inode cache and if present 1394 * return it with an increased reference coun 1067 * return it with an increased reference count. This is for file systems 1395 * where the inode number is sufficient for u 1068 * where the inode number is sufficient for unique identification of an inode. 1396 * 1069 * 1397 * If the inode is not in cache, allocate a n 1070 * If the inode is not in cache, allocate a new inode and return it locked, 1398 * hashed, and with the I_NEW flag set. The 1071 * hashed, and with the I_NEW flag set. The file system gets to fill it in 1399 * before unlocking it via unlock_new_inode() 1072 * before unlocking it via unlock_new_inode(). 1400 */ 1073 */ 1401 struct inode *iget_locked(struct super_block 1074 struct inode *iget_locked(struct super_block *sb, unsigned long ino) 1402 { 1075 { 1403 struct hlist_head *head = inode_hasht 1076 struct hlist_head *head = inode_hashtable + hash(sb, ino); 1404 struct inode *inode; 1077 struct inode *inode; 1405 again: !! 1078 1406 inode = find_inode_fast(sb, head, ino !! 1079 spin_lock(&inode_hash_lock); >> 1080 inode = find_inode_fast(sb, head, ino); >> 1081 spin_unlock(&inode_hash_lock); 1407 if (inode) { 1082 if (inode) { 1408 if (IS_ERR(inode)) << 1409 return NULL; << 1410 wait_on_inode(inode); 1083 wait_on_inode(inode); 1411 if (unlikely(inode_unhashed(i << 1412 iput(inode); << 1413 goto again; << 1414 } << 1415 return inode; 1084 return inode; 1416 } 1085 } 1417 1086 1418 inode = alloc_inode(sb); 1087 inode = alloc_inode(sb); 1419 if (inode) { 1088 if (inode) { 1420 struct inode *old; 1089 struct inode *old; 1421 1090 1422 spin_lock(&inode_hash_lock); 1091 spin_lock(&inode_hash_lock); 1423 /* We released the lock, so.. 1092 /* We released the lock, so.. */ 1424 old = find_inode_fast(sb, hea !! 1093 old = find_inode_fast(sb, head, ino); 1425 if (!old) { 1094 if (!old) { 1426 inode->i_ino = ino; 1095 inode->i_ino = ino; 1427 spin_lock(&inode->i_l 1096 spin_lock(&inode->i_lock); 1428 inode->i_state = I_NE 1097 inode->i_state = I_NEW; 1429 hlist_add_head_rcu(&i !! 1098 hlist_add_head(&inode->i_hash, head); 1430 spin_unlock(&inode->i 1099 spin_unlock(&inode->i_lock); 1431 inode_sb_list_add(ino 1100 inode_sb_list_add(inode); 1432 spin_unlock(&inode_ha 1101 spin_unlock(&inode_hash_lock); 1433 1102 1434 /* Return the locked 1103 /* Return the locked inode with I_NEW set, the 1435 * caller is responsi 1104 * caller is responsible for filling in the contents 1436 */ 1105 */ 1437 return inode; 1106 return inode; 1438 } 1107 } 1439 1108 1440 /* 1109 /* 1441 * Uhhuh, somebody else creat 1110 * Uhhuh, somebody else created the same inode under 1442 * us. Use the old inode inst 1111 * us. Use the old inode instead of the one we just 1443 * allocated. 1112 * allocated. 1444 */ 1113 */ 1445 spin_unlock(&inode_hash_lock) 1114 spin_unlock(&inode_hash_lock); 1446 destroy_inode(inode); 1115 destroy_inode(inode); 1447 if (IS_ERR(old)) << 1448 return NULL; << 1449 inode = old; 1116 inode = old; 1450 wait_on_inode(inode); 1117 wait_on_inode(inode); 1451 if (unlikely(inode_unhashed(i << 1452 iput(inode); << 1453 goto again; << 1454 } << 1455 } 1118 } 1456 return inode; 1119 return inode; 1457 } 1120 } 1458 EXPORT_SYMBOL(iget_locked); 1121 EXPORT_SYMBOL(iget_locked); 1459 1122 1460 /* 1123 /* 1461 * search the inode cache for a matching inod 1124 * search the inode cache for a matching inode number. 1462 * If we find one, then the inode number we a 1125 * If we find one, then the inode number we are trying to 1463 * allocate is not unique and so we should no 1126 * allocate is not unique and so we should not use it. 1464 * 1127 * 1465 * Returns 1 if the inode number is unique, 0 1128 * Returns 1 if the inode number is unique, 0 if it is not. 1466 */ 1129 */ 1467 static int test_inode_iunique(struct super_bl 1130 static int test_inode_iunique(struct super_block *sb, unsigned long ino) 1468 { 1131 { 1469 struct hlist_head *b = inode_hashtabl 1132 struct hlist_head *b = inode_hashtable + hash(sb, ino); 1470 struct inode *inode; 1133 struct inode *inode; 1471 1134 1472 hlist_for_each_entry_rcu(inode, b, i_ !! 1135 spin_lock(&inode_hash_lock); 1473 if (inode->i_ino == ino && in !! 1136 hlist_for_each_entry(inode, b, i_hash) { >> 1137 if (inode->i_ino == ino && inode->i_sb == sb) { >> 1138 spin_unlock(&inode_hash_lock); 1474 return 0; 1139 return 0; >> 1140 } 1475 } 1141 } >> 1142 spin_unlock(&inode_hash_lock); >> 1143 1476 return 1; 1144 return 1; 1477 } 1145 } 1478 1146 1479 /** 1147 /** 1480 * iunique - get a unique inode number 1148 * iunique - get a unique inode number 1481 * @sb: superblock 1149 * @sb: superblock 1482 * @max_reserved: highest reserved inode 1150 * @max_reserved: highest reserved inode number 1483 * 1151 * 1484 * Obtain an inode number that is unique 1152 * Obtain an inode number that is unique on the system for a given 1485 * superblock. This is used by file syst 1153 * superblock. This is used by file systems that have no natural 1486 * permanent inode numbering system. An 1154 * permanent inode numbering system. An inode number is returned that 1487 * is higher than the reserved limit but 1155 * is higher than the reserved limit but unique. 1488 * 1156 * 1489 * BUGS: 1157 * BUGS: 1490 * With a large number of inodes live on 1158 * With a large number of inodes live on the file system this function 1491 * currently becomes quite slow. 1159 * currently becomes quite slow. 1492 */ 1160 */ 1493 ino_t iunique(struct super_block *sb, ino_t m 1161 ino_t iunique(struct super_block *sb, ino_t max_reserved) 1494 { 1162 { 1495 /* 1163 /* 1496 * On a 32bit, non LFS stat() call, g 1164 * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW 1497 * error if st_ino won't fit in targe 1165 * error if st_ino won't fit in target struct field. Use 32bit counter 1498 * here to attempt to avoid that. 1166 * here to attempt to avoid that. 1499 */ 1167 */ 1500 static DEFINE_SPINLOCK(iunique_lock); 1168 static DEFINE_SPINLOCK(iunique_lock); 1501 static unsigned int counter; 1169 static unsigned int counter; 1502 ino_t res; 1170 ino_t res; 1503 1171 1504 rcu_read_lock(); << 1505 spin_lock(&iunique_lock); 1172 spin_lock(&iunique_lock); 1506 do { 1173 do { 1507 if (counter <= max_reserved) 1174 if (counter <= max_reserved) 1508 counter = max_reserve 1175 counter = max_reserved + 1; 1509 res = counter++; 1176 res = counter++; 1510 } while (!test_inode_iunique(sb, res) 1177 } while (!test_inode_iunique(sb, res)); 1511 spin_unlock(&iunique_lock); 1178 spin_unlock(&iunique_lock); 1512 rcu_read_unlock(); << 1513 1179 1514 return res; 1180 return res; 1515 } 1181 } 1516 EXPORT_SYMBOL(iunique); 1182 EXPORT_SYMBOL(iunique); 1517 1183 1518 struct inode *igrab(struct inode *inode) 1184 struct inode *igrab(struct inode *inode) 1519 { 1185 { 1520 spin_lock(&inode->i_lock); 1186 spin_lock(&inode->i_lock); 1521 if (!(inode->i_state & (I_FREEING|I_W 1187 if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) { 1522 __iget(inode); 1188 __iget(inode); 1523 spin_unlock(&inode->i_lock); 1189 spin_unlock(&inode->i_lock); 1524 } else { 1190 } else { 1525 spin_unlock(&inode->i_lock); 1191 spin_unlock(&inode->i_lock); 1526 /* 1192 /* 1527 * Handle the case where s_op 1193 * Handle the case where s_op->clear_inode is not been 1528 * called yet, and somebody i 1194 * called yet, and somebody is calling igrab 1529 * while the inode is getting 1195 * while the inode is getting freed. 1530 */ 1196 */ 1531 inode = NULL; 1197 inode = NULL; 1532 } 1198 } 1533 return inode; 1199 return inode; 1534 } 1200 } 1535 EXPORT_SYMBOL(igrab); 1201 EXPORT_SYMBOL(igrab); 1536 1202 1537 /** 1203 /** 1538 * ilookup5_nowait - search for an inode in t 1204 * ilookup5_nowait - search for an inode in the inode cache 1539 * @sb: super block of file system to 1205 * @sb: super block of file system to search 1540 * @hashval: hash value (usually inode num 1206 * @hashval: hash value (usually inode number) to search for 1541 * @test: callback used for comparisons 1207 * @test: callback used for comparisons between inodes 1542 * @data: opaque data pointer to pass t 1208 * @data: opaque data pointer to pass to @test 1543 * 1209 * 1544 * Search for the inode specified by @hashval 1210 * Search for the inode specified by @hashval and @data in the inode cache. 1545 * If the inode is in the cache, the inode is 1211 * If the inode is in the cache, the inode is returned with an incremented 1546 * reference count. 1212 * reference count. 1547 * 1213 * 1548 * Note: I_NEW is not waited upon so you have 1214 * Note: I_NEW is not waited upon so you have to be very careful what you do 1549 * with the returned inode. You probably sho 1215 * with the returned inode. You probably should be using ilookup5() instead. 1550 * 1216 * 1551 * Note2: @test is called with the inode_hash 1217 * Note2: @test is called with the inode_hash_lock held, so can't sleep. 1552 */ 1218 */ 1553 struct inode *ilookup5_nowait(struct super_bl 1219 struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval, 1554 int (*test)(struct inode *, v 1220 int (*test)(struct inode *, void *), void *data) 1555 { 1221 { 1556 struct hlist_head *head = inode_hasht 1222 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 1557 struct inode *inode; 1223 struct inode *inode; 1558 1224 1559 spin_lock(&inode_hash_lock); 1225 spin_lock(&inode_hash_lock); 1560 inode = find_inode(sb, head, test, da !! 1226 inode = find_inode(sb, head, test, data); 1561 spin_unlock(&inode_hash_lock); 1227 spin_unlock(&inode_hash_lock); 1562 1228 1563 return IS_ERR(inode) ? NULL : inode; !! 1229 return inode; 1564 } 1230 } 1565 EXPORT_SYMBOL(ilookup5_nowait); 1231 EXPORT_SYMBOL(ilookup5_nowait); 1566 1232 1567 /** 1233 /** 1568 * ilookup5 - search for an inode in the inod 1234 * ilookup5 - search for an inode in the inode cache 1569 * @sb: super block of file system to 1235 * @sb: super block of file system to search 1570 * @hashval: hash value (usually inode num 1236 * @hashval: hash value (usually inode number) to search for 1571 * @test: callback used for comparisons 1237 * @test: callback used for comparisons between inodes 1572 * @data: opaque data pointer to pass t 1238 * @data: opaque data pointer to pass to @test 1573 * 1239 * 1574 * Search for the inode specified by @hashval 1240 * Search for the inode specified by @hashval and @data in the inode cache, 1575 * and if the inode is in the cache, return t 1241 * and if the inode is in the cache, return the inode with an incremented 1576 * reference count. Waits on I_NEW before re 1242 * reference count. Waits on I_NEW before returning the inode. 1577 * returned with an incremented reference cou 1243 * returned with an incremented reference count. 1578 * 1244 * 1579 * This is a generalized version of ilookup() 1245 * This is a generalized version of ilookup() for file systems where the 1580 * inode number is not sufficient for unique 1246 * inode number is not sufficient for unique identification of an inode. 1581 * 1247 * 1582 * Note: @test is called with the inode_hash_ 1248 * Note: @test is called with the inode_hash_lock held, so can't sleep. 1583 */ 1249 */ 1584 struct inode *ilookup5(struct super_block *sb 1250 struct inode *ilookup5(struct super_block *sb, unsigned long hashval, 1585 int (*test)(struct inode *, v 1251 int (*test)(struct inode *, void *), void *data) 1586 { 1252 { 1587 struct inode *inode; !! 1253 struct inode *inode = ilookup5_nowait(sb, hashval, test, data); 1588 again: !! 1254 1589 inode = ilookup5_nowait(sb, hashval, !! 1255 if (inode) 1590 if (inode) { << 1591 wait_on_inode(inode); 1256 wait_on_inode(inode); 1592 if (unlikely(inode_unhashed(i << 1593 iput(inode); << 1594 goto again; << 1595 } << 1596 } << 1597 return inode; 1257 return inode; 1598 } 1258 } 1599 EXPORT_SYMBOL(ilookup5); 1259 EXPORT_SYMBOL(ilookup5); 1600 1260 1601 /** 1261 /** 1602 * ilookup - search for an inode in the inode 1262 * ilookup - search for an inode in the inode cache 1603 * @sb: super block of file system to 1263 * @sb: super block of file system to search 1604 * @ino: inode number to search for 1264 * @ino: inode number to search for 1605 * 1265 * 1606 * Search for the inode @ino in the inode cac 1266 * Search for the inode @ino in the inode cache, and if the inode is in the 1607 * cache, the inode is returned with an incre 1267 * cache, the inode is returned with an incremented reference count. 1608 */ 1268 */ 1609 struct inode *ilookup(struct super_block *sb, 1269 struct inode *ilookup(struct super_block *sb, unsigned long ino) 1610 { 1270 { 1611 struct hlist_head *head = inode_hasht 1271 struct hlist_head *head = inode_hashtable + hash(sb, ino); 1612 struct inode *inode; 1272 struct inode *inode; 1613 again: << 1614 inode = find_inode_fast(sb, head, ino << 1615 << 1616 if (inode) { << 1617 if (IS_ERR(inode)) << 1618 return NULL; << 1619 wait_on_inode(inode); << 1620 if (unlikely(inode_unhashed(i << 1621 iput(inode); << 1622 goto again; << 1623 } << 1624 } << 1625 return inode; << 1626 } << 1627 EXPORT_SYMBOL(ilookup); << 1628 << 1629 /** << 1630 * find_inode_nowait - find an inode in the i << 1631 * @sb: super block of file system to << 1632 * @hashval: hash value (usually inode num << 1633 * @match: callback used for comparisons << 1634 * @data: opaque data pointer to pass t << 1635 * << 1636 * Search for the inode specified by @hashval << 1637 * cache, where the helper function @match wi << 1638 * does not match, 1 if the inode does match, << 1639 * should be stopped. The @match function mu << 1640 * taking the i_lock spin_lock and checking i << 1641 * freed or being initialized, and incrementi << 1642 * before returning 1. It also must not slee << 1643 * the inode_hash_lock spinlock held. << 1644 * << 1645 * This is a even more generalized version of << 1646 * function must never block --- find_inode() << 1647 * __wait_on_freeing_inode() --- or when the << 1648 * the reference count because the resulting << 1649 * inode eviction. The tradeoff is that the << 1650 * very carefully implemented. << 1651 */ << 1652 struct inode *find_inode_nowait(struct super_ << 1653 unsigned long << 1654 int (*match)( << 1655 << 1656 void *data) << 1657 { << 1658 struct hlist_head *head = inode_hasht << 1659 struct inode *inode, *ret_inode = NUL << 1660 int mval; << 1661 1273 1662 spin_lock(&inode_hash_lock); 1274 spin_lock(&inode_hash_lock); 1663 hlist_for_each_entry(inode, head, i_h !! 1275 inode = find_inode_fast(sb, head, ino); 1664 if (inode->i_sb != sb) << 1665 continue; << 1666 mval = match(inode, hashval, << 1667 if (mval == 0) << 1668 continue; << 1669 if (mval == 1) << 1670 ret_inode = inode; << 1671 goto out; << 1672 } << 1673 out: << 1674 spin_unlock(&inode_hash_lock); 1276 spin_unlock(&inode_hash_lock); 1675 return ret_inode; << 1676 } << 1677 EXPORT_SYMBOL(find_inode_nowait); << 1678 << 1679 /** << 1680 * find_inode_rcu - find an inode in the inod << 1681 * @sb: Super block of file system to << 1682 * @hashval: Key to hash << 1683 * @test: Function to test match on an << 1684 * @data: Data for test function << 1685 * << 1686 * Search for the inode specified by @hashval << 1687 * where the helper function @test will retur << 1688 * and 1 if it does. The @test function must << 1689 * i_lock spin_lock and checking i_state for << 1690 * initialized. << 1691 * << 1692 * If successful, this will return the inode << 1693 * returned 1 and NULL otherwise. << 1694 * << 1695 * The @test function is not permitted to tak << 1696 * It is also not permitted to sleep. << 1697 * << 1698 * The caller must hold the RCU read lock. << 1699 */ << 1700 struct inode *find_inode_rcu(struct super_blo << 1701 int (*test)(stru << 1702 { << 1703 struct hlist_head *head = inode_hasht << 1704 struct inode *inode; << 1705 << 1706 RCU_LOCKDEP_WARN(!rcu_read_lock_held( << 1707 "suspicious find_ino << 1708 << 1709 hlist_for_each_entry_rcu(inode, head, << 1710 if (inode->i_sb == sb && << 1711 !(READ_ONCE(inode->i_stat << 1712 test(inode, data)) << 1713 return inode; << 1714 } << 1715 return NULL; << 1716 } << 1717 EXPORT_SYMBOL(find_inode_rcu); << 1718 << 1719 /** << 1720 * find_inode_by_ino_rcu - Find an inode in t << 1721 * @sb: Super block of file system to << 1722 * @ino: The inode number to match << 1723 * << 1724 * Search for the inode specified by @hashval << 1725 * where the helper function @test will retur << 1726 * and 1 if it does. The @test function must << 1727 * i_lock spin_lock and checking i_state for << 1728 * initialized. << 1729 * << 1730 * If successful, this will return the inode << 1731 * returned 1 and NULL otherwise. << 1732 * << 1733 * The @test function is not permitted to tak << 1734 * It is also not permitted to sleep. << 1735 * << 1736 * The caller must hold the RCU read lock. << 1737 */ << 1738 struct inode *find_inode_by_ino_rcu(struct su << 1739 unsigned << 1740 { << 1741 struct hlist_head *head = inode_hasht << 1742 struct inode *inode; << 1743 1277 1744 RCU_LOCKDEP_WARN(!rcu_read_lock_held( !! 1278 if (inode) 1745 "suspicious find_ino !! 1279 wait_on_inode(inode); 1746 !! 1280 return inode; 1747 hlist_for_each_entry_rcu(inode, head, << 1748 if (inode->i_ino == ino && << 1749 inode->i_sb == sb && << 1750 !(READ_ONCE(inode->i_stat << 1751 return inode; << 1752 } << 1753 return NULL; << 1754 } 1281 } 1755 EXPORT_SYMBOL(find_inode_by_ino_rcu); !! 1282 EXPORT_SYMBOL(ilookup); 1756 1283 1757 int insert_inode_locked(struct inode *inode) 1284 int insert_inode_locked(struct inode *inode) 1758 { 1285 { 1759 struct super_block *sb = inode->i_sb; 1286 struct super_block *sb = inode->i_sb; 1760 ino_t ino = inode->i_ino; 1287 ino_t ino = inode->i_ino; 1761 struct hlist_head *head = inode_hasht 1288 struct hlist_head *head = inode_hashtable + hash(sb, ino); 1762 1289 1763 while (1) { 1290 while (1) { 1764 struct inode *old = NULL; 1291 struct inode *old = NULL; 1765 spin_lock(&inode_hash_lock); 1292 spin_lock(&inode_hash_lock); 1766 hlist_for_each_entry(old, hea 1293 hlist_for_each_entry(old, head, i_hash) { 1767 if (old->i_ino != ino 1294 if (old->i_ino != ino) 1768 continue; 1295 continue; 1769 if (old->i_sb != sb) 1296 if (old->i_sb != sb) 1770 continue; 1297 continue; 1771 spin_lock(&old->i_loc 1298 spin_lock(&old->i_lock); 1772 if (old->i_state & (I 1299 if (old->i_state & (I_FREEING|I_WILL_FREE)) { 1773 spin_unlock(& 1300 spin_unlock(&old->i_lock); 1774 continue; 1301 continue; 1775 } 1302 } 1776 break; 1303 break; 1777 } 1304 } 1778 if (likely(!old)) { 1305 if (likely(!old)) { 1779 spin_lock(&inode->i_l 1306 spin_lock(&inode->i_lock); 1780 inode->i_state |= I_N !! 1307 inode->i_state |= I_NEW; 1781 hlist_add_head_rcu(&i !! 1308 hlist_add_head(&inode->i_hash, head); 1782 spin_unlock(&inode->i 1309 spin_unlock(&inode->i_lock); 1783 spin_unlock(&inode_ha 1310 spin_unlock(&inode_hash_lock); 1784 return 0; 1311 return 0; 1785 } 1312 } 1786 if (unlikely(old->i_state & I << 1787 spin_unlock(&old->i_l << 1788 spin_unlock(&inode_ha << 1789 return -EBUSY; << 1790 } << 1791 __iget(old); 1313 __iget(old); 1792 spin_unlock(&old->i_lock); 1314 spin_unlock(&old->i_lock); 1793 spin_unlock(&inode_hash_lock) 1315 spin_unlock(&inode_hash_lock); 1794 wait_on_inode(old); 1316 wait_on_inode(old); 1795 if (unlikely(!inode_unhashed( 1317 if (unlikely(!inode_unhashed(old))) { 1796 iput(old); 1318 iput(old); 1797 return -EBUSY; 1319 return -EBUSY; 1798 } 1320 } 1799 iput(old); 1321 iput(old); 1800 } 1322 } 1801 } 1323 } 1802 EXPORT_SYMBOL(insert_inode_locked); 1324 EXPORT_SYMBOL(insert_inode_locked); 1803 1325 1804 int insert_inode_locked4(struct inode *inode, 1326 int insert_inode_locked4(struct inode *inode, unsigned long hashval, 1805 int (*test)(struct inode *, v 1327 int (*test)(struct inode *, void *), void *data) 1806 { 1328 { 1807 struct inode *old; !! 1329 struct super_block *sb = inode->i_sb; >> 1330 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 1808 1331 1809 inode->i_state |= I_CREATING; !! 1332 while (1) { 1810 old = inode_insert5(inode, hashval, t !! 1333 struct inode *old = NULL; 1811 1334 1812 if (old != inode) { !! 1335 spin_lock(&inode_hash_lock); >> 1336 hlist_for_each_entry(old, head, i_hash) { >> 1337 if (old->i_sb != sb) >> 1338 continue; >> 1339 if (!test(old, data)) >> 1340 continue; >> 1341 spin_lock(&old->i_lock); >> 1342 if (old->i_state & (I_FREEING|I_WILL_FREE)) { >> 1343 spin_unlock(&old->i_lock); >> 1344 continue; >> 1345 } >> 1346 break; >> 1347 } >> 1348 if (likely(!old)) { >> 1349 spin_lock(&inode->i_lock); >> 1350 inode->i_state |= I_NEW; >> 1351 hlist_add_head(&inode->i_hash, head); >> 1352 spin_unlock(&inode->i_lock); >> 1353 spin_unlock(&inode_hash_lock); >> 1354 return 0; >> 1355 } >> 1356 __iget(old); >> 1357 spin_unlock(&old->i_lock); >> 1358 spin_unlock(&inode_hash_lock); >> 1359 wait_on_inode(old); >> 1360 if (unlikely(!inode_unhashed(old))) { >> 1361 iput(old); >> 1362 return -EBUSY; >> 1363 } 1813 iput(old); 1364 iput(old); 1814 return -EBUSY; << 1815 } 1365 } 1816 return 0; << 1817 } 1366 } 1818 EXPORT_SYMBOL(insert_inode_locked4); 1367 EXPORT_SYMBOL(insert_inode_locked4); 1819 1368 1820 1369 1821 int generic_delete_inode(struct inode *inode) 1370 int generic_delete_inode(struct inode *inode) 1822 { 1371 { 1823 return 1; 1372 return 1; 1824 } 1373 } 1825 EXPORT_SYMBOL(generic_delete_inode); 1374 EXPORT_SYMBOL(generic_delete_inode); 1826 1375 1827 /* 1376 /* 1828 * Called when we're dropping the last refere 1377 * Called when we're dropping the last reference 1829 * to an inode. 1378 * to an inode. 1830 * 1379 * 1831 * Call the FS "drop_inode()" function, defau 1380 * Call the FS "drop_inode()" function, defaulting to 1832 * the legacy UNIX filesystem behaviour. If 1381 * the legacy UNIX filesystem behaviour. If it tells 1833 * us to evict inode, do so. Otherwise, reta 1382 * us to evict inode, do so. Otherwise, retain inode 1834 * in cache if fs is alive, sync and evict if 1383 * in cache if fs is alive, sync and evict if fs is 1835 * shutting down. 1384 * shutting down. 1836 */ 1385 */ 1837 static void iput_final(struct inode *inode) 1386 static void iput_final(struct inode *inode) 1838 { 1387 { 1839 struct super_block *sb = inode->i_sb; 1388 struct super_block *sb = inode->i_sb; 1840 const struct super_operations *op = i 1389 const struct super_operations *op = inode->i_sb->s_op; 1841 unsigned long state; << 1842 int drop; 1390 int drop; 1843 1391 1844 WARN_ON(inode->i_state & I_NEW); 1392 WARN_ON(inode->i_state & I_NEW); 1845 1393 1846 if (op->drop_inode) 1394 if (op->drop_inode) 1847 drop = op->drop_inode(inode); 1395 drop = op->drop_inode(inode); 1848 else 1396 else 1849 drop = generic_drop_inode(ino 1397 drop = generic_drop_inode(inode); 1850 1398 1851 if (!drop && !! 1399 if (!drop && (sb->s_flags & MS_ACTIVE)) { 1852 !(inode->i_state & I_DONTCACHE) & !! 1400 inode->i_state |= I_REFERENCED; 1853 (sb->s_flags & SB_ACTIVE)) { !! 1401 inode_add_lru(inode); 1854 __inode_add_lru(inode, true); << 1855 spin_unlock(&inode->i_lock); 1402 spin_unlock(&inode->i_lock); 1856 return; 1403 return; 1857 } 1404 } 1858 1405 1859 state = inode->i_state; << 1860 if (!drop) { 1406 if (!drop) { 1861 WRITE_ONCE(inode->i_state, st !! 1407 inode->i_state |= I_WILL_FREE; 1862 spin_unlock(&inode->i_lock); 1408 spin_unlock(&inode->i_lock); 1863 << 1864 write_inode_now(inode, 1); 1409 write_inode_now(inode, 1); 1865 << 1866 spin_lock(&inode->i_lock); 1410 spin_lock(&inode->i_lock); 1867 state = inode->i_state; !! 1411 WARN_ON(inode->i_state & I_NEW); 1868 WARN_ON(state & I_NEW); !! 1412 inode->i_state &= ~I_WILL_FREE; 1869 state &= ~I_WILL_FREE; << 1870 } 1413 } 1871 1414 1872 WRITE_ONCE(inode->i_state, state | I_ !! 1415 inode->i_state |= I_FREEING; 1873 if (!list_empty(&inode->i_lru)) 1416 if (!list_empty(&inode->i_lru)) 1874 inode_lru_list_del(inode); 1417 inode_lru_list_del(inode); 1875 spin_unlock(&inode->i_lock); 1418 spin_unlock(&inode->i_lock); 1876 1419 1877 evict(inode); 1420 evict(inode); 1878 } 1421 } 1879 1422 1880 /** 1423 /** 1881 * iput - put an inode 1424 * iput - put an inode 1882 * @inode: inode to put 1425 * @inode: inode to put 1883 * 1426 * 1884 * Puts an inode, dropping its usage cou 1427 * Puts an inode, dropping its usage count. If the inode use count hits 1885 * zero, the inode is then freed and may 1428 * zero, the inode is then freed and may also be destroyed. 1886 * 1429 * 1887 * Consequently, iput() can sleep. 1430 * Consequently, iput() can sleep. 1888 */ 1431 */ 1889 void iput(struct inode *inode) 1432 void iput(struct inode *inode) 1890 { 1433 { 1891 if (!inode) !! 1434 if (inode) { 1892 return; !! 1435 BUG_ON(inode->i_state & I_CLEAR); 1893 BUG_ON(inode->i_state & I_CLEAR); !! 1436 1894 retry: !! 1437 if (atomic_dec_and_lock(&inode->i_count, &inode->i_lock)) 1895 if (atomic_dec_and_lock(&inode->i_cou !! 1438 iput_final(inode); 1896 if (inode->i_nlink && (inode- << 1897 atomic_inc(&inode->i_ << 1898 spin_unlock(&inode->i << 1899 trace_writeback_lazyt << 1900 mark_inode_dirty_sync << 1901 goto retry; << 1902 } << 1903 iput_final(inode); << 1904 } 1439 } 1905 } 1440 } 1906 EXPORT_SYMBOL(iput); 1441 EXPORT_SYMBOL(iput); 1907 1442 1908 #ifdef CONFIG_BLOCK << 1909 /** 1443 /** 1910 * bmap - find a block number in a fi 1444 * bmap - find a block number in a file 1911 * @inode: inode owning the block numbe !! 1445 * @inode: inode of file 1912 * @block: pointer containing the block !! 1446 * @block: block to find 1913 * 1447 * 1914 * Replaces the value in ``*block`` with !! 1448 * Returns the block number on the device holding the inode that 1915 * corresponding to the requested block !! 1449 * is the disk block number for the block of the file requested. 1916 * That is, asked for block 4 of inode 1 !! 1450 * That is, asked for block 4 of inode 1 the function will return the 1917 * 4 in ``*block``, with disk block rela !! 1451 * disk block relative to the disk start that holds that block of the 1918 * block of the file. !! 1452 * file. 1919 * !! 1453 */ 1920 * Returns -EINVAL in case of error, 0 o !! 1454 sector_t bmap(struct inode *inode, sector_t block) 1921 * hole, returns 0 and ``*block`` is als !! 1455 { 1922 */ !! 1456 sector_t res = 0; 1923 int bmap(struct inode *inode, sector_t *block !! 1457 if (inode->i_mapping->a_ops->bmap) 1924 { !! 1458 res = inode->i_mapping->a_ops->bmap(inode->i_mapping, block); 1925 if (!inode->i_mapping->a_ops->bmap) !! 1459 return res; 1926 return -EINVAL; << 1927 << 1928 *block = inode->i_mapping->a_ops->bma << 1929 return 0; << 1930 } 1460 } 1931 EXPORT_SYMBOL(bmap); 1461 EXPORT_SYMBOL(bmap); 1932 #endif << 1933 1462 1934 /* 1463 /* 1935 * With relative atime, only update atime if 1464 * With relative atime, only update atime if the previous atime is 1936 * earlier than or equal to either the ctime !! 1465 * earlier than either the ctime or mtime or if at least a day has 1937 * or if at least a day has passed since the !! 1466 * passed since the last atime update. 1938 */ 1467 */ 1939 static bool relatime_need_update(struct vfsmo !! 1468 static int relatime_need_update(struct vfsmount *mnt, struct inode *inode, 1940 struct timespec6 !! 1469 struct timespec now) 1941 { 1470 { 1942 struct timespec64 atime, mtime, ctime << 1943 1471 1944 if (!(mnt->mnt_flags & MNT_RELATIME)) 1472 if (!(mnt->mnt_flags & MNT_RELATIME)) 1945 return true; !! 1473 return 1; 1946 /* 1474 /* 1947 * Is mtime younger than or equal to !! 1475 * Is mtime younger than atime? If yes, update atime: 1948 */ 1476 */ 1949 atime = inode_get_atime(inode); !! 1477 if (timespec_compare(&inode->i_mtime, &inode->i_atime) >= 0) 1950 mtime = inode_get_mtime(inode); !! 1478 return 1; 1951 if (timespec64_compare(&mtime, &atime << 1952 return true; << 1953 /* 1479 /* 1954 * Is ctime younger than or equal to !! 1480 * Is ctime younger than atime? If yes, update atime: 1955 */ 1481 */ 1956 ctime = inode_get_ctime(inode); !! 1482 if (timespec_compare(&inode->i_ctime, &inode->i_atime) >= 0) 1957 if (timespec64_compare(&ctime, &atime !! 1483 return 1; 1958 return true; << 1959 1484 1960 /* 1485 /* 1961 * Is the previous atime value older 1486 * Is the previous atime value older than a day? If yes, 1962 * update atime: 1487 * update atime: 1963 */ 1488 */ 1964 if ((long)(now.tv_sec - atime.tv_sec) !! 1489 if ((long)(now.tv_sec - inode->i_atime.tv_sec) >= 24*60*60) 1965 return true; !! 1490 return 1; 1966 /* 1491 /* 1967 * Good, we can skip the atime update 1492 * Good, we can skip the atime update: 1968 */ 1493 */ 1969 return false; !! 1494 return 0; 1970 } << 1971 << 1972 /** << 1973 * inode_update_timestamps - update the times << 1974 * @inode: inode to be updated << 1975 * @flags: S_* flags that needed to be update << 1976 * << 1977 * The update_time function is called when an << 1978 * updated for a read or write operation. Thi << 1979 * actual timestamps. It's up to the caller t << 1980 * dirty appropriately. << 1981 * << 1982 * In the case where any of S_MTIME, S_CTIME, << 1983 * attempt to update all three of them. S_ATI << 1984 * independently of the rest. << 1985 * << 1986 * Returns a set of S_* flags indicating whic << 1987 */ << 1988 int inode_update_timestamps(struct inode *ino << 1989 { << 1990 int updated = 0; << 1991 struct timespec64 now; << 1992 << 1993 if (flags & (S_MTIME|S_CTIME|S_VERSIO << 1994 struct timespec64 ctime = ino << 1995 struct timespec64 mtime = ino << 1996 << 1997 now = inode_set_ctime_current << 1998 if (!timespec64_equal(&now, & << 1999 updated |= S_CTIME; << 2000 if (!timespec64_equal(&now, & << 2001 inode_set_mtime_to_ts << 2002 updated |= S_MTIME; << 2003 } << 2004 if (IS_I_VERSION(inode) && in << 2005 updated |= S_VERSION; << 2006 } else { << 2007 now = current_time(inode); << 2008 } << 2009 << 2010 if (flags & S_ATIME) { << 2011 struct timespec64 atime = ino << 2012 << 2013 if (!timespec64_equal(&now, & << 2014 inode_set_atime_to_ts << 2015 updated |= S_ATIME; << 2016 } << 2017 } << 2018 return updated; << 2019 } << 2020 EXPORT_SYMBOL(inode_update_timestamps); << 2021 << 2022 /** << 2023 * generic_update_time - update the timestamp << 2024 * @inode: inode to be updated << 2025 * @flags: S_* flags that needed to be update << 2026 * << 2027 * The update_time function is called when an << 2028 * updated for a read or write operation. In << 2029 * or S_VERSION need to be updated we attempt << 2030 * updates can be handled done independently << 2031 * << 2032 * Returns a S_* mask indicating which fields << 2033 */ << 2034 int generic_update_time(struct inode *inode, << 2035 { << 2036 int updated = inode_update_timestamps << 2037 int dirty_flags = 0; << 2038 << 2039 if (updated & (S_ATIME|S_MTIME|S_CTIM << 2040 dirty_flags = inode->i_sb->s_ << 2041 if (updated & S_VERSION) << 2042 dirty_flags |= I_DIRTY_SYNC; << 2043 __mark_inode_dirty(inode, dirty_flags << 2044 return updated; << 2045 } 1495 } 2046 EXPORT_SYMBOL(generic_update_time); << 2047 1496 2048 /* 1497 /* 2049 * This does the actual work of updating an i 1498 * This does the actual work of updating an inodes time or version. Must have 2050 * had called mnt_want_write() before calling 1499 * had called mnt_want_write() before calling this. 2051 */ 1500 */ 2052 int inode_update_time(struct inode *inode, in !! 1501 static int update_time(struct inode *inode, struct timespec *time, int flags) 2053 { 1502 { 2054 if (inode->i_op->update_time) 1503 if (inode->i_op->update_time) 2055 return inode->i_op->update_ti !! 1504 return inode->i_op->update_time(inode, time, flags); 2056 generic_update_time(inode, flags); !! 1505 >> 1506 if (flags & S_ATIME) >> 1507 inode->i_atime = *time; >> 1508 if (flags & S_VERSION) >> 1509 inode_inc_iversion(inode); >> 1510 if (flags & S_CTIME) >> 1511 inode->i_ctime = *time; >> 1512 if (flags & S_MTIME) >> 1513 inode->i_mtime = *time; >> 1514 mark_inode_dirty_sync(inode); 2057 return 0; 1515 return 0; 2058 } 1516 } 2059 EXPORT_SYMBOL(inode_update_time); << 2060 1517 2061 /** 1518 /** 2062 * atime_needs_update - updat !! 1519 * touch_atime - update the access time 2063 * @path: the &struct path to update 1520 * @path: the &struct path to update 2064 * @inode: inode to update << 2065 * 1521 * 2066 * Update the accessed time on an inode 1522 * Update the accessed time on an inode and mark it for writeback. 2067 * This function automatically handles r 1523 * This function automatically handles read only file systems and media, 2068 * as well as the "noatime" flag and ino 1524 * as well as the "noatime" flag and inode specific "noatime" markers. 2069 */ 1525 */ 2070 bool atime_needs_update(const struct path *pa !! 1526 void touch_atime(struct path *path) 2071 { 1527 { 2072 struct vfsmount *mnt = path->mnt; 1528 struct vfsmount *mnt = path->mnt; 2073 struct timespec64 now, atime; !! 1529 struct inode *inode = path->dentry->d_inode; >> 1530 struct timespec now; 2074 1531 2075 if (inode->i_flags & S_NOATIME) 1532 if (inode->i_flags & S_NOATIME) 2076 return false; !! 1533 return; 2077 << 2078 /* Atime updates will likely cause i_ << 2079 * back improprely if their true valu << 2080 */ << 2081 if (HAS_UNMAPPED_ID(mnt_idmap(mnt), i << 2082 return false; << 2083 << 2084 if (IS_NOATIME(inode)) 1534 if (IS_NOATIME(inode)) 2085 return false; !! 1535 return; 2086 if ((inode->i_sb->s_flags & SB_NODIRA !! 1536 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode)) 2087 return false; !! 1537 return; 2088 1538 2089 if (mnt->mnt_flags & MNT_NOATIME) 1539 if (mnt->mnt_flags & MNT_NOATIME) 2090 return false; !! 1540 return; 2091 if ((mnt->mnt_flags & MNT_NODIRATIME) 1541 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)) 2092 return false; !! 1542 return; 2093 1543 2094 now = current_time(inode); !! 1544 now = current_fs_time(inode->i_sb); 2095 1545 2096 if (!relatime_need_update(mnt, inode, 1546 if (!relatime_need_update(mnt, inode, now)) 2097 return false; !! 1547 return; 2098 << 2099 atime = inode_get_atime(inode); << 2100 if (timespec64_equal(&atime, &now)) << 2101 return false; << 2102 << 2103 return true; << 2104 } << 2105 << 2106 void touch_atime(const struct path *path) << 2107 { << 2108 struct vfsmount *mnt = path->mnt; << 2109 struct inode *inode = d_inode(path->d << 2110 1548 2111 if (!atime_needs_update(path, inode)) !! 1549 if (timespec_equal(&inode->i_atime, &now)) 2112 return; 1550 return; 2113 1551 2114 if (!sb_start_write_trylock(inode->i_ 1552 if (!sb_start_write_trylock(inode->i_sb)) 2115 return; 1553 return; 2116 1554 2117 if (mnt_get_write_access(mnt) != 0) !! 1555 if (__mnt_want_write(mnt)) 2118 goto skip_update; 1556 goto skip_update; 2119 /* 1557 /* 2120 * File systems can error out when up 1558 * File systems can error out when updating inodes if they need to 2121 * allocate new space to modify an in 1559 * allocate new space to modify an inode (such is the case for 2122 * Btrfs), but since we touch atime w 1560 * Btrfs), but since we touch atime while walking down the path we 2123 * really don't care if we failed to 1561 * really don't care if we failed to update the atime of the file, 2124 * so just ignore the return value. 1562 * so just ignore the return value. 2125 * We may also fail on filesystems th 1563 * We may also fail on filesystems that have the ability to make parts 2126 * of the fs read only, e.g. subvolum 1564 * of the fs read only, e.g. subvolumes in Btrfs. 2127 */ 1565 */ 2128 inode_update_time(inode, S_ATIME); !! 1566 update_time(inode, &now, S_ATIME); 2129 mnt_put_write_access(mnt); !! 1567 __mnt_drop_write(mnt); 2130 skip_update: 1568 skip_update: 2131 sb_end_write(inode->i_sb); 1569 sb_end_write(inode->i_sb); 2132 } 1570 } 2133 EXPORT_SYMBOL(touch_atime); 1571 EXPORT_SYMBOL(touch_atime); 2134 1572 2135 /* 1573 /* 2136 * Return mask of changes for notify_change() !! 1574 * The logic we want is 2137 * response to write or truncate. Return 0 if !! 1575 * 2138 * Negative value on error (change should be !! 1576 * if suid or (sgid and xgrp) >> 1577 * remove privs 2139 */ 1578 */ 2140 int dentry_needs_remove_privs(struct mnt_idma !! 1579 int should_remove_suid(struct dentry *dentry) 2141 struct dentry * << 2142 { 1580 { 2143 struct inode *inode = d_inode(dentry) !! 1581 umode_t mode = dentry->d_inode->i_mode; 2144 int mask = 0; !! 1582 int kill = 0; 2145 int ret; << 2146 1583 2147 if (IS_NOSEC(inode)) !! 1584 /* suid always must be killed */ 2148 return 0; !! 1585 if (unlikely(mode & S_ISUID)) >> 1586 kill = ATTR_KILL_SUID; >> 1587 >> 1588 /* >> 1589 * sgid without any exec bits is just a mandatory locking mark; leave >> 1590 * it alone. If some exec bits are set, it's a real sgid; kill it. >> 1591 */ >> 1592 if (unlikely((mode & S_ISGID) && (mode & S_IXGRP))) >> 1593 kill |= ATTR_KILL_SGID; >> 1594 >> 1595 if (unlikely(kill && !capable(CAP_FSETID) && S_ISREG(mode))) >> 1596 return kill; 2149 1597 2150 mask = setattr_should_drop_suidgid(id !! 1598 return 0; 2151 ret = security_inode_need_killpriv(de << 2152 if (ret < 0) << 2153 return ret; << 2154 if (ret) << 2155 mask |= ATTR_KILL_PRIV; << 2156 return mask; << 2157 } 1599 } >> 1600 EXPORT_SYMBOL(should_remove_suid); 2158 1601 2159 static int __remove_privs(struct mnt_idmap *i !! 1602 static int __remove_suid(struct dentry *dentry, int kill) 2160 struct dentry *dent << 2161 { 1603 { 2162 struct iattr newattrs; 1604 struct iattr newattrs; 2163 1605 2164 newattrs.ia_valid = ATTR_FORCE | kill 1606 newattrs.ia_valid = ATTR_FORCE | kill; 2165 /* !! 1607 return notify_change(dentry, &newattrs); 2166 * Note we call this on write, so not << 2167 * encounter any conflicting delegati << 2168 */ << 2169 return notify_change(idmap, dentry, & << 2170 } 1608 } 2171 1609 2172 int file_remove_privs_flags(struct file *file !! 1610 int file_remove_suid(struct file *file) 2173 { 1611 { 2174 struct dentry *dentry = file_dentry(f !! 1612 struct dentry *dentry = file->f_path.dentry; 2175 struct inode *inode = file_inode(file !! 1613 struct inode *inode = dentry->d_inode; >> 1614 int killsuid; >> 1615 int killpriv; 2176 int error = 0; 1616 int error = 0; 2177 int kill; << 2178 1617 2179 if (IS_NOSEC(inode) || !S_ISREG(inode !! 1618 /* Fast path for nothing security related */ >> 1619 if (IS_NOSEC(inode)) 2180 return 0; 1620 return 0; 2181 1621 2182 kill = dentry_needs_remove_privs(file !! 1622 killsuid = should_remove_suid(dentry); 2183 if (kill < 0) !! 1623 killpriv = security_inode_need_killpriv(dentry); 2184 return kill; << 2185 << 2186 if (kill) { << 2187 if (flags & IOCB_NOWAIT) << 2188 return -EAGAIN; << 2189 << 2190 error = __remove_privs(file_m << 2191 } << 2192 1624 >> 1625 if (killpriv < 0) >> 1626 return killpriv; >> 1627 if (killpriv) >> 1628 error = security_inode_killpriv(dentry); >> 1629 if (!error && killsuid) >> 1630 error = __remove_suid(dentry, killsuid); 2193 if (!error) 1631 if (!error) 2194 inode_has_no_xattr(inode); 1632 inode_has_no_xattr(inode); >> 1633 2195 return error; 1634 return error; 2196 } 1635 } 2197 EXPORT_SYMBOL_GPL(file_remove_privs_flags); !! 1636 EXPORT_SYMBOL(file_remove_suid); 2198 1637 2199 /** 1638 /** 2200 * file_remove_privs - remove special file pr !! 1639 * file_update_time - update mtime and ctime time 2201 * @file: file to remove privileges from !! 1640 * @file: file accessed 2202 * 1641 * 2203 * When file is modified by a write or trunca !! 1642 * Update the mtime and ctime members of an inode and mark the inode 2204 * file privileges are removed. !! 1643 * for writeback. Note that this function is meant exclusively for 2205 * !! 1644 * usage in the file write path of filesystems, and filesystems may 2206 * Return: 0 on success, negative errno on fa !! 1645 * choose to explicitly ignore update via this function with the >> 1646 * S_NOCMTIME inode flag, e.g. for network filesystem where these >> 1647 * timestamps are handled by the server. This can return an error for >> 1648 * file systems who need to allocate space in order to update an inode. 2207 */ 1649 */ 2208 int file_remove_privs(struct file *file) << 2209 { << 2210 return file_remove_privs_flags(file, << 2211 } << 2212 EXPORT_SYMBOL(file_remove_privs); << 2213 1650 2214 static int inode_needs_update_time(struct ino !! 1651 int file_update_time(struct file *file) 2215 { 1652 { >> 1653 struct inode *inode = file_inode(file); >> 1654 struct timespec now; 2216 int sync_it = 0; 1655 int sync_it = 0; 2217 struct timespec64 now = current_time( !! 1656 int ret; 2218 struct timespec64 ts; << 2219 1657 2220 /* First try to exhaust all avenues t 1658 /* First try to exhaust all avenues to not sync */ 2221 if (IS_NOCMTIME(inode)) 1659 if (IS_NOCMTIME(inode)) 2222 return 0; 1660 return 0; 2223 1661 2224 ts = inode_get_mtime(inode); !! 1662 now = current_fs_time(inode->i_sb); 2225 if (!timespec64_equal(&ts, &now)) !! 1663 if (!timespec_equal(&inode->i_mtime, &now)) 2226 sync_it = S_MTIME; 1664 sync_it = S_MTIME; 2227 1665 2228 ts = inode_get_ctime(inode); !! 1666 if (!timespec_equal(&inode->i_ctime, &now)) 2229 if (!timespec64_equal(&ts, &now)) << 2230 sync_it |= S_CTIME; 1667 sync_it |= S_CTIME; 2231 1668 2232 if (IS_I_VERSION(inode) && inode_iver !! 1669 if (IS_I_VERSION(inode)) 2233 sync_it |= S_VERSION; 1670 sync_it |= S_VERSION; 2234 1671 2235 return sync_it; !! 1672 if (!sync_it) 2236 } !! 1673 return 0; 2237 1674 2238 static int __file_update_time(struct file *fi !! 1675 /* Finally allowed to write? Takes lock. */ 2239 { !! 1676 if (__mnt_want_write_file(file)) 2240 int ret = 0; !! 1677 return 0; 2241 struct inode *inode = file_inode(file << 2242 1678 2243 /* try to update time settings */ !! 1679 ret = update_time(inode, &now, sync_it); 2244 if (!mnt_get_write_access_file(file)) !! 1680 __mnt_drop_write_file(file); 2245 ret = inode_update_time(inode << 2246 mnt_put_write_access_file(fil << 2247 } << 2248 1681 2249 return ret; 1682 return ret; 2250 } 1683 } 2251 << 2252 /** << 2253 * file_update_time - update mtime and ctime << 2254 * @file: file accessed << 2255 * << 2256 * Update the mtime and ctime members of an i << 2257 * writeback. Note that this function is mean << 2258 * the file write path of filesystems, and fi << 2259 * explicitly ignore updates via this functio << 2260 * flag, e.g. for network filesystem where th << 2261 * by the server. This can return an error fo << 2262 * allocate space in order to update an inode << 2263 * << 2264 * Return: 0 on success, negative errno on fa << 2265 */ << 2266 int file_update_time(struct file *file) << 2267 { << 2268 int ret; << 2269 struct inode *inode = file_inode(file << 2270 << 2271 ret = inode_needs_update_time(inode); << 2272 if (ret <= 0) << 2273 return ret; << 2274 << 2275 return __file_update_time(file, ret); << 2276 } << 2277 EXPORT_SYMBOL(file_update_time); 1684 EXPORT_SYMBOL(file_update_time); 2278 1685 2279 /** << 2280 * file_modified_flags - handle mandated vfs << 2281 * @file: file that was modified << 2282 * @flags: kiocb flags << 2283 * << 2284 * When file has been modified ensure that sp << 2285 * file privileges are removed and time setti << 2286 * << 2287 * If IOCB_NOWAIT is set, special file privil << 2288 * time settings will not be updated. It will << 2289 * << 2290 * Context: Caller must hold the file's inode << 2291 * << 2292 * Return: 0 on success, negative errno on fa << 2293 */ << 2294 static int file_modified_flags(struct file *f << 2295 { << 2296 int ret; << 2297 struct inode *inode = file_inode(file << 2298 << 2299 /* << 2300 * Clear the security bits if the pro << 2301 * This keeps people from modifying s << 2302 */ << 2303 ret = file_remove_privs_flags(file, f << 2304 if (ret) << 2305 return ret; << 2306 << 2307 if (unlikely(file->f_mode & FMODE_NOC << 2308 return 0; << 2309 << 2310 ret = inode_needs_update_time(inode); << 2311 if (ret <= 0) << 2312 return ret; << 2313 if (flags & IOCB_NOWAIT) << 2314 return -EAGAIN; << 2315 << 2316 return __file_update_time(file, ret); << 2317 } << 2318 << 2319 /** << 2320 * file_modified - handle mandated vfs change << 2321 * @file: file that was modified << 2322 * << 2323 * When file has been modified ensure that sp << 2324 * file privileges are removed and time setti << 2325 * << 2326 * Context: Caller must hold the file's inode << 2327 * << 2328 * Return: 0 on success, negative errno on fa << 2329 */ << 2330 int file_modified(struct file *file) << 2331 { << 2332 return file_modified_flags(file, 0); << 2333 } << 2334 EXPORT_SYMBOL(file_modified); << 2335 << 2336 /** << 2337 * kiocb_modified - handle mandated vfs chang << 2338 * @iocb: iocb that was modified << 2339 * << 2340 * When file has been modified ensure that sp << 2341 * file privileges are removed and time setti << 2342 * << 2343 * Context: Caller must hold the file's inode << 2344 * << 2345 * Return: 0 on success, negative errno on fa << 2346 */ << 2347 int kiocb_modified(struct kiocb *iocb) << 2348 { << 2349 return file_modified_flags(iocb->ki_f << 2350 } << 2351 EXPORT_SYMBOL_GPL(kiocb_modified); << 2352 << 2353 int inode_needs_sync(struct inode *inode) 1686 int inode_needs_sync(struct inode *inode) 2354 { 1687 { 2355 if (IS_SYNC(inode)) 1688 if (IS_SYNC(inode)) 2356 return 1; 1689 return 1; 2357 if (S_ISDIR(inode->i_mode) && IS_DIRS 1690 if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode)) 2358 return 1; 1691 return 1; 2359 return 0; 1692 return 0; 2360 } 1693 } 2361 EXPORT_SYMBOL(inode_needs_sync); 1694 EXPORT_SYMBOL(inode_needs_sync); 2362 1695 >> 1696 int inode_wait(void *word) >> 1697 { >> 1698 schedule(); >> 1699 return 0; >> 1700 } >> 1701 EXPORT_SYMBOL(inode_wait); >> 1702 2363 /* 1703 /* 2364 * If we try to find an inode in the inode ha 1704 * If we try to find an inode in the inode hash while it is being 2365 * deleted, we have to wait until the filesys 1705 * deleted, we have to wait until the filesystem completes its 2366 * deletion before reporting that it isn't fo 1706 * deletion before reporting that it isn't found. This function waits 2367 * until the deletion _might_ have completed. 1707 * until the deletion _might_ have completed. Callers are responsible 2368 * to recheck inode state. 1708 * to recheck inode state. 2369 * 1709 * 2370 * It doesn't matter if I_NEW is not set init 1710 * It doesn't matter if I_NEW is not set initially, a call to 2371 * wake_up_bit(&inode->i_state, __I_NEW) afte 1711 * wake_up_bit(&inode->i_state, __I_NEW) after removing from the hash list 2372 * will DTRT. 1712 * will DTRT. 2373 */ 1713 */ 2374 static void __wait_on_freeing_inode(struct in !! 1714 static void __wait_on_freeing_inode(struct inode *inode) 2375 { 1715 { 2376 struct wait_bit_queue_entry wqe; !! 1716 wait_queue_head_t *wq; 2377 struct wait_queue_head *wq_head; !! 1717 DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW); 2378 !! 1718 wq = bit_waitqueue(&inode->i_state, __I_NEW); 2379 /* !! 1719 prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); 2380 * Handle racing against evict(), see << 2381 */ << 2382 if (unlikely(inode_unhashed(inode))) << 2383 WARN_ON(is_inode_hash_locked) << 2384 spin_unlock(&inode->i_lock); << 2385 return; << 2386 } << 2387 << 2388 wq_head = inode_bit_waitqueue(&wqe, i << 2389 prepare_to_wait_event(wq_head, &wqe.w << 2390 spin_unlock(&inode->i_lock); 1720 spin_unlock(&inode->i_lock); 2391 rcu_read_unlock(); !! 1721 spin_unlock(&inode_hash_lock); 2392 if (is_inode_hash_locked) << 2393 spin_unlock(&inode_hash_lock) << 2394 schedule(); 1722 schedule(); 2395 finish_wait(wq_head, &wqe.wq_entry); !! 1723 finish_wait(wq, &wait.wait); 2396 if (is_inode_hash_locked) !! 1724 spin_lock(&inode_hash_lock); 2397 spin_lock(&inode_hash_lock); << 2398 rcu_read_lock(); << 2399 } 1725 } 2400 1726 2401 static __initdata unsigned long ihash_entries 1727 static __initdata unsigned long ihash_entries; 2402 static int __init set_ihash_entries(char *str 1728 static int __init set_ihash_entries(char *str) 2403 { 1729 { 2404 if (!str) 1730 if (!str) 2405 return 0; 1731 return 0; 2406 ihash_entries = simple_strtoul(str, & 1732 ihash_entries = simple_strtoul(str, &str, 0); 2407 return 1; 1733 return 1; 2408 } 1734 } 2409 __setup("ihash_entries=", set_ihash_entries); 1735 __setup("ihash_entries=", set_ihash_entries); 2410 1736 2411 /* 1737 /* 2412 * Initialize the waitqueues and inode hash t 1738 * Initialize the waitqueues and inode hash table. 2413 */ 1739 */ 2414 void __init inode_init_early(void) 1740 void __init inode_init_early(void) 2415 { 1741 { >> 1742 unsigned int loop; >> 1743 2416 /* If hashes are distributed across N 1744 /* If hashes are distributed across NUMA nodes, defer 2417 * hash allocation until vmalloc spac 1745 * hash allocation until vmalloc space is available. 2418 */ 1746 */ 2419 if (hashdist) 1747 if (hashdist) 2420 return; 1748 return; 2421 1749 2422 inode_hashtable = 1750 inode_hashtable = 2423 alloc_large_system_hash("Inod 1751 alloc_large_system_hash("Inode-cache", 2424 sizeo 1752 sizeof(struct hlist_head), 2425 ihash 1753 ihash_entries, 2426 14, 1754 14, 2427 HASH_ !! 1755 HASH_EARLY, 2428 &i_ha 1756 &i_hash_shift, 2429 &i_ha 1757 &i_hash_mask, 2430 0, 1758 0, 2431 0); 1759 0); >> 1760 >> 1761 for (loop = 0; loop < (1U << i_hash_shift); loop++) >> 1762 INIT_HLIST_HEAD(&inode_hashtable[loop]); 2432 } 1763 } 2433 1764 2434 void __init inode_init(void) 1765 void __init inode_init(void) 2435 { 1766 { >> 1767 unsigned int loop; >> 1768 2436 /* inode slab cache */ 1769 /* inode slab cache */ 2437 inode_cachep = kmem_cache_create("ino 1770 inode_cachep = kmem_cache_create("inode_cache", 2438 size 1771 sizeof(struct inode), 2439 0, 1772 0, 2440 (SLA 1773 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC| 2441 SLAB !! 1774 SLAB_MEM_SPREAD), 2442 init 1775 init_once); 2443 1776 2444 /* Hash may have been set up in inode 1777 /* Hash may have been set up in inode_init_early */ 2445 if (!hashdist) 1778 if (!hashdist) 2446 return; 1779 return; 2447 1780 2448 inode_hashtable = 1781 inode_hashtable = 2449 alloc_large_system_hash("Inod 1782 alloc_large_system_hash("Inode-cache", 2450 sizeo 1783 sizeof(struct hlist_head), 2451 ihash 1784 ihash_entries, 2452 14, 1785 14, 2453 HASH_ !! 1786 0, 2454 &i_ha 1787 &i_hash_shift, 2455 &i_ha 1788 &i_hash_mask, 2456 0, 1789 0, 2457 0); 1790 0); >> 1791 >> 1792 for (loop = 0; loop < (1U << i_hash_shift); loop++) >> 1793 INIT_HLIST_HEAD(&inode_hashtable[loop]); 2458 } 1794 } 2459 1795 2460 void init_special_inode(struct inode *inode, 1796 void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev) 2461 { 1797 { 2462 inode->i_mode = mode; 1798 inode->i_mode = mode; 2463 if (S_ISCHR(mode)) { 1799 if (S_ISCHR(mode)) { 2464 inode->i_fop = &def_chr_fops; 1800 inode->i_fop = &def_chr_fops; 2465 inode->i_rdev = rdev; 1801 inode->i_rdev = rdev; 2466 } else if (S_ISBLK(mode)) { 1802 } else if (S_ISBLK(mode)) { 2467 if (IS_ENABLED(CONFIG_BLOCK)) !! 1803 inode->i_fop = &def_blk_fops; 2468 inode->i_fop = &def_b << 2469 inode->i_rdev = rdev; 1804 inode->i_rdev = rdev; 2470 } else if (S_ISFIFO(mode)) 1805 } else if (S_ISFIFO(mode)) 2471 inode->i_fop = &pipefifo_fops 1806 inode->i_fop = &pipefifo_fops; 2472 else if (S_ISSOCK(mode)) 1807 else if (S_ISSOCK(mode)) 2473 ; /* leave it no_open_f !! 1808 inode->i_fop = &bad_sock_fops; 2474 else 1809 else 2475 printk(KERN_DEBUG "init_speci 1810 printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for" 2476 " inode %s: 1811 " inode %s:%lu\n", mode, inode->i_sb->s_id, 2477 inode->i_in 1812 inode->i_ino); 2478 } 1813 } 2479 EXPORT_SYMBOL(init_special_inode); 1814 EXPORT_SYMBOL(init_special_inode); 2480 1815 2481 /** 1816 /** 2482 * inode_init_owner - Init uid,gid,mode for n 1817 * inode_init_owner - Init uid,gid,mode for new inode according to posix standards 2483 * @idmap: idmap of the mount the inode was c << 2484 * @inode: New inode 1818 * @inode: New inode 2485 * @dir: Directory inode 1819 * @dir: Directory inode 2486 * @mode: mode of the new inode 1820 * @mode: mode of the new inode 2487 * << 2488 * If the inode has been created through an i << 2489 * the vfsmount must be passed through @idmap << 2490 * care to map the inode according to @idmap << 2491 * and initializing i_uid and i_gid. On non-i << 2492 * checking is to be performed on the raw ino << 2493 */ 1821 */ 2494 void inode_init_owner(struct mnt_idmap *idmap !! 1822 void inode_init_owner(struct inode *inode, const struct inode *dir, 2495 const struct inode *dir !! 1823 umode_t mode) 2496 { 1824 { 2497 inode_fsuid_set(inode, idmap); !! 1825 inode->i_uid = current_fsuid(); 2498 if (dir && dir->i_mode & S_ISGID) { 1826 if (dir && dir->i_mode & S_ISGID) { 2499 inode->i_gid = dir->i_gid; 1827 inode->i_gid = dir->i_gid; 2500 << 2501 /* Directories are special, a << 2502 if (S_ISDIR(mode)) 1828 if (S_ISDIR(mode)) 2503 mode |= S_ISGID; 1829 mode |= S_ISGID; 2504 } else 1830 } else 2505 inode_fsgid_set(inode, idmap) !! 1831 inode->i_gid = current_fsgid(); 2506 inode->i_mode = mode; 1832 inode->i_mode = mode; 2507 } 1833 } 2508 EXPORT_SYMBOL(inode_init_owner); 1834 EXPORT_SYMBOL(inode_init_owner); 2509 1835 2510 /** 1836 /** 2511 * inode_owner_or_capable - check current tas 1837 * inode_owner_or_capable - check current task permissions to inode 2512 * @idmap: idmap of the mount the inode was f << 2513 * @inode: inode being checked 1838 * @inode: inode being checked 2514 * 1839 * 2515 * Return true if current either has CAP_FOWN 1840 * Return true if current either has CAP_FOWNER in a namespace with the 2516 * inode owner uid mapped, or owns the file. 1841 * inode owner uid mapped, or owns the file. 2517 * << 2518 * If the inode has been found through an idm << 2519 * the vfsmount must be passed through @idmap << 2520 * care to map the inode according to @idmap << 2521 * On non-idmapped mounts or if permission ch << 2522 * raw inode simply pass @nop_mnt_idmap. << 2523 */ 1842 */ 2524 bool inode_owner_or_capable(struct mnt_idmap !! 1843 bool inode_owner_or_capable(const struct inode *inode) 2525 const struct inod << 2526 { 1844 { 2527 vfsuid_t vfsuid; << 2528 struct user_namespace *ns; 1845 struct user_namespace *ns; 2529 1846 2530 vfsuid = i_uid_into_vfsuid(idmap, ino !! 1847 if (uid_eq(current_fsuid(), inode->i_uid)) 2531 if (vfsuid_eq_kuid(vfsuid, current_fs << 2532 return true; 1848 return true; 2533 1849 2534 ns = current_user_ns(); 1850 ns = current_user_ns(); 2535 if (vfsuid_has_mapping(ns, vfsuid) && !! 1851 if (ns_capable(ns, CAP_FOWNER) && kuid_has_mapping(ns, inode->i_uid)) 2536 return true; 1852 return true; 2537 return false; 1853 return false; 2538 } 1854 } 2539 EXPORT_SYMBOL(inode_owner_or_capable); 1855 EXPORT_SYMBOL(inode_owner_or_capable); 2540 1856 2541 /* 1857 /* 2542 * Direct i/o helper functions 1858 * Direct i/o helper functions 2543 */ 1859 */ 2544 bool inode_dio_finished(const struct inode *i !! 1860 static void __inode_dio_wait(struct inode *inode) 2545 { 1861 { 2546 return atomic_read(&inode->i_dio_coun !! 1862 wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP); >> 1863 DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP); >> 1864 >> 1865 do { >> 1866 prepare_to_wait(wq, &q.wait, TASK_UNINTERRUPTIBLE); >> 1867 if (atomic_read(&inode->i_dio_count)) >> 1868 schedule(); >> 1869 } while (atomic_read(&inode->i_dio_count)); >> 1870 finish_wait(wq, &q.wait); 2547 } 1871 } 2548 EXPORT_SYMBOL(inode_dio_finished); << 2549 1872 2550 /** 1873 /** 2551 * inode_dio_wait - wait for outstanding DIO 1874 * inode_dio_wait - wait for outstanding DIO requests to finish 2552 * @inode: inode to wait for 1875 * @inode: inode to wait for 2553 * 1876 * 2554 * Waits for all pending direct I/O requests 1877 * Waits for all pending direct I/O requests to finish so that we can 2555 * proceed with a truncate or equivalent oper 1878 * proceed with a truncate or equivalent operation. 2556 * 1879 * 2557 * Must be called under a lock that serialize 1880 * Must be called under a lock that serializes taking new references 2558 * to i_dio_count, usually by inode->i_mutex. 1881 * to i_dio_count, usually by inode->i_mutex. 2559 */ 1882 */ 2560 void inode_dio_wait(struct inode *inode) 1883 void inode_dio_wait(struct inode *inode) 2561 { 1884 { 2562 wait_var_event(&inode->i_dio_count, i !! 1885 if (atomic_read(&inode->i_dio_count)) >> 1886 __inode_dio_wait(inode); 2563 } 1887 } 2564 EXPORT_SYMBOL(inode_dio_wait); 1888 EXPORT_SYMBOL(inode_dio_wait); 2565 1889 2566 void inode_dio_wait_interruptible(struct inod << 2567 { << 2568 wait_var_event_interruptible(&inode-> << 2569 inode_di << 2570 } << 2571 EXPORT_SYMBOL(inode_dio_wait_interruptible); << 2572 << 2573 /* 1890 /* 2574 * inode_set_flags - atomically set some inod !! 1891 * inode_dio_done - signal finish of a direct I/O requests 2575 * !! 1892 * @inode: inode the direct I/O happens on 2576 * Note: the caller should be holding i_mutex << 2577 * they have exclusive access to the inode st << 2578 * inode is being instantiated). The reason << 2579 * --- which wouldn't be necessary if all cod << 2580 * i_flags actually followed this rule, is th << 2581 * code path which doesn't today so we use cm << 2582 * of caution. << 2583 * << 2584 * In the long run, i_mutex is overkill, and << 2585 * at using the i_lock spinlock to protect i_ << 2586 * it is so documented in include/linux/fs.h << 2587 * the locking convention!! << 2588 */ << 2589 void inode_set_flags(struct inode *inode, uns << 2590 unsigned int mask) << 2591 { << 2592 WARN_ON_ONCE(flags & ~mask); << 2593 set_mask_bits(&inode->i_flags, mask, << 2594 } << 2595 EXPORT_SYMBOL(inode_set_flags); << 2596 << 2597 void inode_nohighmem(struct inode *inode) << 2598 { << 2599 mapping_set_gfp_mask(inode->i_mapping << 2600 } << 2601 EXPORT_SYMBOL(inode_nohighmem); << 2602 << 2603 /** << 2604 * timestamp_truncate - Truncate timespec to << 2605 * @t: Timespec << 2606 * @inode: inode being updated << 2607 * 1893 * 2608 * Truncate a timespec to the granularity sup !! 1894 * This is called once we've finished processing a direct I/O request, 2609 * containing the inode. Always rounds down. !! 1895 * and is used to wake up callers waiting for direct I/O to be quiesced. 2610 * not be 0 nor greater than a second (NSEC_P << 2611 */ 1896 */ 2612 struct timespec64 timestamp_truncate(struct t !! 1897 void inode_dio_done(struct inode *inode) 2613 { 1898 { 2614 struct super_block *sb = inode->i_sb; !! 1899 if (atomic_dec_and_test(&inode->i_dio_count)) 2615 unsigned int gran = sb->s_time_gran; !! 1900 wake_up_bit(&inode->i_state, __I_DIO_WAKEUP); 2616 << 2617 t.tv_sec = clamp(t.tv_sec, sb->s_time << 2618 if (unlikely(t.tv_sec == sb->s_time_m << 2619 t.tv_nsec = 0; << 2620 << 2621 /* Avoid division in the common cases << 2622 if (gran == 1) << 2623 ; /* nothing */ << 2624 else if (gran == NSEC_PER_SEC) << 2625 t.tv_nsec = 0; << 2626 else if (gran > 1 && gran < NSEC_PER_ << 2627 t.tv_nsec -= t.tv_nsec % gran << 2628 else << 2629 WARN(1, "invalid file time gr << 2630 return t; << 2631 } << 2632 EXPORT_SYMBOL(timestamp_truncate); << 2633 << 2634 /** << 2635 * current_time - Return FS time << 2636 * @inode: inode. << 2637 * << 2638 * Return the current time truncated to the t << 2639 * the fs. << 2640 * << 2641 * Note that inode and inode->sb cannot be NU << 2642 * Otherwise, the function warns and returns << 2643 */ << 2644 struct timespec64 current_time(struct inode * << 2645 { << 2646 struct timespec64 now; << 2647 << 2648 ktime_get_coarse_real_ts64(&now); << 2649 return timestamp_truncate(now, inode) << 2650 } << 2651 EXPORT_SYMBOL(current_time); << 2652 << 2653 /** << 2654 * inode_set_ctime_current - set the ctime to << 2655 * @inode: inode << 2656 * << 2657 * Set the inode->i_ctime to the current valu << 2658 * the current value that was assigned to i_c << 2659 */ << 2660 struct timespec64 inode_set_ctime_current(str << 2661 { << 2662 struct timespec64 now = current_time( << 2663 << 2664 inode_set_ctime_to_ts(inode, now); << 2665 return now; << 2666 } << 2667 EXPORT_SYMBOL(inode_set_ctime_current); << 2668 << 2669 /** << 2670 * in_group_or_capable - check whether caller << 2671 * @idmap: idmap of the mount @inode was << 2672 * @inode: inode to check << 2673 * @vfsgid: the new/current vfsgid of @in << 2674 * << 2675 * Check wether @vfsgid is in the caller's gr << 2676 * privileged with CAP_FSETID over @inode. Th << 2677 * whether the setgid bit can be kept or must << 2678 * << 2679 * Return: true if the caller is sufficiently << 2680 */ << 2681 bool in_group_or_capable(struct mnt_idmap *id << 2682 const struct inode * << 2683 { << 2684 if (vfsgid_in_group_p(vfsgid)) << 2685 return true; << 2686 if (capable_wrt_inode_uidgid(idmap, i << 2687 return true; << 2688 return false; << 2689 } << 2690 EXPORT_SYMBOL(in_group_or_capable); << 2691 << 2692 /** << 2693 * mode_strip_sgid - handle the sgid bit for << 2694 * @idmap: idmap of the mount the inode was c << 2695 * @dir: parent directory inode << 2696 * @mode: mode of the file to be created in @ << 2697 * << 2698 * If the @mode of the new file has both the << 2699 * raised and @dir has the S_ISGID bit raised << 2700 * either in the group of the parent director << 2701 * in their user namespace and are privileged << 2702 * In all other cases, strip the S_ISGID bit << 2703 * << 2704 * Return: the new mode to use for the file << 2705 */ << 2706 umode_t mode_strip_sgid(struct mnt_idmap *idm << 2707 const struct inode *d << 2708 { << 2709 if ((mode & (S_ISGID | S_IXGRP)) != ( << 2710 return mode; << 2711 if (S_ISDIR(mode) || !dir || !(dir->i << 2712 return mode; << 2713 if (in_group_or_capable(idmap, dir, i << 2714 return mode; << 2715 return mode & ~S_ISGID; << 2716 } 1901 } 2717 EXPORT_SYMBOL(mode_strip_sgid); !! 1902 EXPORT_SYMBOL(inode_dio_done); 2718 1903
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.