1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * The NFSD open file cache. 4 * 5 * (c) 2015 - Jeff Layton <jeff.layton@primarydata.com> 6 * 7 * An nfsd_file object is a per-file collection of open state that binds 8 * together: 9 * - a struct file * 10 * - a user credential 11 * - a network namespace 12 * - a read-ahead context 13 * - monitoring for writeback errors 14 * 15 * nfsd_file objects are reference-counted. Consumers acquire a new 16 * object via the nfsd_file_acquire API. They manage their interest in 17 * the acquired object, and hence the object's reference count, via 18 * nfsd_file_get and nfsd_file_put. There are two varieties of nfsd_file 19 * object: 20 * 21 * * non-garbage-collected: When a consumer wants to precisely control 22 * the lifetime of a file's open state, it acquires a non-garbage- 23 * collected nfsd_file. The final nfsd_file_put releases the open 24 * state immediately. 25 * 26 * * garbage-collected: When a consumer does not control the lifetime 27 * of open state, it acquires a garbage-collected nfsd_file. The 28 * final nfsd_file_put allows the open state to linger for a period 29 * during which it may be re-used. 30 */ 31 32 #include <linux/hash.h> 33 #include <linux/slab.h> 34 #include <linux/file.h> 35 #include <linux/pagemap.h> 36 #include <linux/sched.h> 37 #include <linux/list_lru.h> 38 #include <linux/fsnotify_backend.h> 39 #include <linux/fsnotify.h> 40 #include <linux/seq_file.h> 41 #include <linux/rhashtable.h> 42 43 #include "vfs.h" 44 #include "nfsd.h" 45 #include "nfsfh.h" 46 #include "netns.h" 47 #include "filecache.h" 48 #include "trace.h" 49 50 #define NFSD_LAUNDRETTE_DELAY (2 * HZ) 51 52 #define NFSD_FILE_CACHE_UP (0) 53 54 /* We only care about NFSD_MAY_READ/WRITE for this cache */ 55 #define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE) 56 57 static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits); 58 static DEFINE_PER_CPU(unsigned long, nfsd_file_acquisitions); 59 static DEFINE_PER_CPU(unsigned long, nfsd_file_releases); 60 static DEFINE_PER_CPU(unsigned long, nfsd_file_total_age); 61 static DEFINE_PER_CPU(unsigned long, nfsd_file_evictions); 62 63 struct nfsd_fcache_disposal { 64 spinlock_t lock; 65 struct list_head freeme; 66 }; 67 68 static struct kmem_cache *nfsd_file_slab; 69 static struct kmem_cache *nfsd_file_mark_slab; 70 static struct list_lru nfsd_file_lru; 71 static unsigned long nfsd_file_flags; 72 static struct fsnotify_group *nfsd_file_fsnotify_group; 73 static struct delayed_work nfsd_filecache_laundrette; 74 static struct rhltable nfsd_file_rhltable 75 ____cacheline_aligned_in_smp; 76 77 static bool 78 nfsd_match_cred(const struct cred *c1, const struct cred *c2) 79 { 80 int i; 81 82 if (!uid_eq(c1->fsuid, c2->fsuid)) 83 return false; 84 if (!gid_eq(c1->fsgid, c2->fsgid)) 85 return false; 86 if (c1->group_info == NULL || c2->group_info == NULL) 87 return c1->group_info == c2->group_info; 88 if (c1->group_info->ngroups != c2->group_info->ngroups) 89 return false; 90 for (i = 0; i < c1->group_info->ngroups; i++) { 91 if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i])) 92 return false; 93 } 94 return true; 95 } 96 97 static const struct rhashtable_params nfsd_file_rhash_params = { 98 .key_len = sizeof_field(struct nfsd_file, nf_inode), 99 .key_offset = offsetof(struct nfsd_file, nf_inode), 100 .head_offset = offsetof(struct nfsd_file, nf_rlist), 101 102 /* 103 * Start with a single page hash table to reduce resizing churn 104 * on light workloads. 105 */ 106 .min_size = 256, 107 .automatic_shrinking = true, 108 }; 109 110 static void 111 nfsd_file_schedule_laundrette(void) 112 { 113 if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags)) 114 queue_delayed_work(system_wq, &nfsd_filecache_laundrette, 115 NFSD_LAUNDRETTE_DELAY); 116 } 117 118 static void 119 nfsd_file_slab_free(struct rcu_head *rcu) 120 { 121 struct nfsd_file *nf = container_of(rcu, struct nfsd_file, nf_rcu); 122 123 put_cred(nf->nf_cred); 124 kmem_cache_free(nfsd_file_slab, nf); 125 } 126 127 static void 128 nfsd_file_mark_free(struct fsnotify_mark *mark) 129 { 130 struct nfsd_file_mark *nfm = container_of(mark, struct nfsd_file_mark, 131 nfm_mark); 132 133 kmem_cache_free(nfsd_file_mark_slab, nfm); 134 } 135 136 static struct nfsd_file_mark * 137 nfsd_file_mark_get(struct nfsd_file_mark *nfm) 138 { 139 if (!refcount_inc_not_zero(&nfm->nfm_ref)) 140 return NULL; 141 return nfm; 142 } 143 144 static void 145 nfsd_file_mark_put(struct nfsd_file_mark *nfm) 146 { 147 if (refcount_dec_and_test(&nfm->nfm_ref)) { 148 fsnotify_destroy_mark(&nfm->nfm_mark, nfsd_file_fsnotify_group); 149 fsnotify_put_mark(&nfm->nfm_mark); 150 } 151 } 152 153 static struct nfsd_file_mark * 154 nfsd_file_mark_find_or_create(struct nfsd_file *nf, struct inode *inode) 155 { 156 int err; 157 struct fsnotify_mark *mark; 158 struct nfsd_file_mark *nfm = NULL, *new; 159 160 do { 161 fsnotify_group_lock(nfsd_file_fsnotify_group); 162 mark = fsnotify_find_inode_mark(inode, 163 nfsd_file_fsnotify_group); 164 if (mark) { 165 nfm = nfsd_file_mark_get(container_of(mark, 166 struct nfsd_file_mark, 167 nfm_mark)); 168 fsnotify_group_unlock(nfsd_file_fsnotify_group); 169 if (nfm) { 170 fsnotify_put_mark(mark); 171 break; 172 } 173 /* Avoid soft lockup race with nfsd_file_mark_put() */ 174 fsnotify_destroy_mark(mark, nfsd_file_fsnotify_group); 175 fsnotify_put_mark(mark); 176 } else { 177 fsnotify_group_unlock(nfsd_file_fsnotify_group); 178 } 179 180 /* allocate a new nfm */ 181 new = kmem_cache_alloc(nfsd_file_mark_slab, GFP_KERNEL); 182 if (!new) 183 return NULL; 184 fsnotify_init_mark(&new->nfm_mark, nfsd_file_fsnotify_group); 185 new->nfm_mark.mask = FS_ATTRIB|FS_DELETE_SELF; 186 refcount_set(&new->nfm_ref, 1); 187 188 err = fsnotify_add_inode_mark(&new->nfm_mark, inode, 0); 189 190 /* 191 * If the add was successful, then return the object. 192 * Otherwise, we need to put the reference we hold on the 193 * nfm_mark. The fsnotify code will take a reference and put 194 * it on failure, so we can't just free it directly. It's also 195 * not safe to call fsnotify_destroy_mark on it as the 196 * mark->group will be NULL. Thus, we can't let the nfm_ref 197 * counter drive the destruction at this point. 198 */ 199 if (likely(!err)) 200 nfm = new; 201 else 202 fsnotify_put_mark(&new->nfm_mark); 203 } while (unlikely(err == -EEXIST)); 204 205 return nfm; 206 } 207 208 static struct nfsd_file * 209 nfsd_file_alloc(struct net *net, struct inode *inode, unsigned char need, 210 bool want_gc) 211 { 212 struct nfsd_file *nf; 213 214 nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL); 215 if (unlikely(!nf)) 216 return NULL; 217 218 INIT_LIST_HEAD(&nf->nf_lru); 219 nf->nf_birthtime = ktime_get(); 220 nf->nf_file = NULL; 221 nf->nf_cred = get_current_cred(); 222 nf->nf_net = net; 223 nf->nf_flags = want_gc ? 224 BIT(NFSD_FILE_HASHED) | BIT(NFSD_FILE_PENDING) | BIT(NFSD_FILE_GC) : 225 BIT(NFSD_FILE_HASHED) | BIT(NFSD_FILE_PENDING); 226 nf->nf_inode = inode; 227 refcount_set(&nf->nf_ref, 1); 228 nf->nf_may = need; 229 nf->nf_mark = NULL; 230 return nf; 231 } 232 233 /** 234 * nfsd_file_check_write_error - check for writeback errors on a file 235 * @nf: nfsd_file to check for writeback errors 236 * 237 * Check whether a nfsd_file has an unseen error. Reset the write 238 * verifier if so. 239 */ 240 static void 241 nfsd_file_check_write_error(struct nfsd_file *nf) 242 { 243 struct file *file = nf->nf_file; 244 245 if ((file->f_mode & FMODE_WRITE) && 246 filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err))) 247 nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); 248 } 249 250 static void 251 nfsd_file_hash_remove(struct nfsd_file *nf) 252 { 253 trace_nfsd_file_unhash(nf); 254 rhltable_remove(&nfsd_file_rhltable, &nf->nf_rlist, 255 nfsd_file_rhash_params); 256 } 257 258 static bool 259 nfsd_file_unhash(struct nfsd_file *nf) 260 { 261 if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 262 nfsd_file_hash_remove(nf); 263 return true; 264 } 265 return false; 266 } 267 268 static void 269 nfsd_file_free(struct nfsd_file *nf) 270 { 271 s64 age = ktime_to_ms(ktime_sub(ktime_get(), nf->nf_birthtime)); 272 273 trace_nfsd_file_free(nf); 274 275 this_cpu_inc(nfsd_file_releases); 276 this_cpu_add(nfsd_file_total_age, age); 277 278 nfsd_file_unhash(nf); 279 if (nf->nf_mark) 280 nfsd_file_mark_put(nf->nf_mark); 281 if (nf->nf_file) { 282 nfsd_file_check_write_error(nf); 283 nfsd_filp_close(nf->nf_file); 284 } 285 286 /* 287 * If this item is still linked via nf_lru, that's a bug. 288 * WARN and leak it to preserve system stability. 289 */ 290 if (WARN_ON_ONCE(!list_empty(&nf->nf_lru))) 291 return; 292 293 call_rcu(&nf->nf_rcu, nfsd_file_slab_free); 294 } 295 296 static bool 297 nfsd_file_check_writeback(struct nfsd_file *nf) 298 { 299 struct file *file = nf->nf_file; 300 struct address_space *mapping; 301 302 /* File not open for write? */ 303 if (!(file->f_mode & FMODE_WRITE)) 304 return false; 305 306 /* 307 * Some filesystems (e.g. NFS) flush all dirty data on close. 308 * On others, there is no need to wait for writeback. 309 */ 310 if (!(file_inode(file)->i_sb->s_export_op->flags & EXPORT_OP_FLUSH_ON_CLOSE)) 311 return false; 312 313 mapping = file->f_mapping; 314 return mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) || 315 mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK); 316 } 317 318 319 static bool nfsd_file_lru_add(struct nfsd_file *nf) 320 { 321 set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); 322 if (list_lru_add_obj(&nfsd_file_lru, &nf->nf_lru)) { 323 trace_nfsd_file_lru_add(nf); 324 return true; 325 } 326 return false; 327 } 328 329 static bool nfsd_file_lru_remove(struct nfsd_file *nf) 330 { 331 if (list_lru_del_obj(&nfsd_file_lru, &nf->nf_lru)) { 332 trace_nfsd_file_lru_del(nf); 333 return true; 334 } 335 return false; 336 } 337 338 struct nfsd_file * 339 nfsd_file_get(struct nfsd_file *nf) 340 { 341 if (nf && refcount_inc_not_zero(&nf->nf_ref)) 342 return nf; 343 return NULL; 344 } 345 346 /** 347 * nfsd_file_put - put the reference to a nfsd_file 348 * @nf: nfsd_file of which to put the reference 349 * 350 * Put a reference to a nfsd_file. In the non-GC case, we just put the 351 * reference immediately. In the GC case, if the reference would be 352 * the last one, the put it on the LRU instead to be cleaned up later. 353 */ 354 void 355 nfsd_file_put(struct nfsd_file *nf) 356 { 357 might_sleep(); 358 trace_nfsd_file_put(nf); 359 360 if (test_bit(NFSD_FILE_GC, &nf->nf_flags) && 361 test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 362 /* 363 * If this is the last reference (nf_ref == 1), then try to 364 * transfer it to the LRU. 365 */ 366 if (refcount_dec_not_one(&nf->nf_ref)) 367 return; 368 369 /* Try to add it to the LRU. If that fails, decrement. */ 370 if (nfsd_file_lru_add(nf)) { 371 /* If it's still hashed, we're done */ 372 if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 373 nfsd_file_schedule_laundrette(); 374 return; 375 } 376 377 /* 378 * We're racing with unhashing, so try to remove it from 379 * the LRU. If removal fails, then someone else already 380 * has our reference. 381 */ 382 if (!nfsd_file_lru_remove(nf)) 383 return; 384 } 385 } 386 if (refcount_dec_and_test(&nf->nf_ref)) 387 nfsd_file_free(nf); 388 } 389 390 static void 391 nfsd_file_dispose_list(struct list_head *dispose) 392 { 393 struct nfsd_file *nf; 394 395 while (!list_empty(dispose)) { 396 nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 397 list_del_init(&nf->nf_lru); 398 nfsd_file_free(nf); 399 } 400 } 401 402 /** 403 * nfsd_file_dispose_list_delayed - move list of dead files to net's freeme list 404 * @dispose: list of nfsd_files to be disposed 405 * 406 * Transfers each file to the "freeme" list for its nfsd_net, to eventually 407 * be disposed of by the per-net garbage collector. 408 */ 409 static void 410 nfsd_file_dispose_list_delayed(struct list_head *dispose) 411 { 412 while(!list_empty(dispose)) { 413 struct nfsd_file *nf = list_first_entry(dispose, 414 struct nfsd_file, nf_lru); 415 struct nfsd_net *nn = net_generic(nf->nf_net, nfsd_net_id); 416 struct nfsd_fcache_disposal *l = nn->fcache_disposal; 417 418 spin_lock(&l->lock); 419 list_move_tail(&nf->nf_lru, &l->freeme); 420 spin_unlock(&l->lock); 421 svc_wake_up(nn->nfsd_serv); 422 } 423 } 424 425 /** 426 * nfsd_file_net_dispose - deal with nfsd_files waiting to be disposed. 427 * @nn: nfsd_net in which to find files to be disposed. 428 * 429 * When files held open for nfsv3 are removed from the filecache, whether 430 * due to memory pressure or garbage collection, they are queued to 431 * a per-net-ns queue. This function completes the disposal, either 432 * directly or by waking another nfsd thread to help with the work. 433 */ 434 void nfsd_file_net_dispose(struct nfsd_net *nn) 435 { 436 struct nfsd_fcache_disposal *l = nn->fcache_disposal; 437 438 if (!list_empty(&l->freeme)) { 439 LIST_HEAD(dispose); 440 int i; 441 442 spin_lock(&l->lock); 443 for (i = 0; i < 8 && !list_empty(&l->freeme); i++) 444 list_move(l->freeme.next, &dispose); 445 spin_unlock(&l->lock); 446 if (!list_empty(&l->freeme)) 447 /* Wake up another thread to share the work 448 * *before* doing any actual disposing. 449 */ 450 svc_wake_up(nn->nfsd_serv); 451 nfsd_file_dispose_list(&dispose); 452 } 453 } 454 455 /** 456 * nfsd_file_lru_cb - Examine an entry on the LRU list 457 * @item: LRU entry to examine 458 * @lru: controlling LRU 459 * @lock: LRU list lock (unused) 460 * @arg: dispose list 461 * 462 * Return values: 463 * %LRU_REMOVED: @item was removed from the LRU 464 * %LRU_ROTATE: @item is to be moved to the LRU tail 465 * %LRU_SKIP: @item cannot be evicted 466 */ 467 static enum lru_status 468 nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, 469 spinlock_t *lock, void *arg) 470 __releases(lock) 471 __acquires(lock) 472 { 473 struct list_head *head = arg; 474 struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru); 475 476 /* We should only be dealing with GC entries here */ 477 WARN_ON_ONCE(!test_bit(NFSD_FILE_GC, &nf->nf_flags)); 478 479 /* 480 * Don't throw out files that are still undergoing I/O or 481 * that have uncleared errors pending. 482 */ 483 if (nfsd_file_check_writeback(nf)) { 484 trace_nfsd_file_gc_writeback(nf); 485 return LRU_SKIP; 486 } 487 488 /* If it was recently added to the list, skip it */ 489 if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) { 490 trace_nfsd_file_gc_referenced(nf); 491 return LRU_ROTATE; 492 } 493 494 /* 495 * Put the reference held on behalf of the LRU. If it wasn't the last 496 * one, then just remove it from the LRU and ignore it. 497 */ 498 if (!refcount_dec_and_test(&nf->nf_ref)) { 499 trace_nfsd_file_gc_in_use(nf); 500 list_lru_isolate(lru, &nf->nf_lru); 501 return LRU_REMOVED; 502 } 503 504 /* Refcount went to zero. Unhash it and queue it to the dispose list */ 505 nfsd_file_unhash(nf); 506 list_lru_isolate_move(lru, &nf->nf_lru, head); 507 this_cpu_inc(nfsd_file_evictions); 508 trace_nfsd_file_gc_disposed(nf); 509 return LRU_REMOVED; 510 } 511 512 static void 513 nfsd_file_gc(void) 514 { 515 LIST_HEAD(dispose); 516 unsigned long ret; 517 518 ret = list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb, 519 &dispose, list_lru_count(&nfsd_file_lru)); 520 trace_nfsd_file_gc_removed(ret, list_lru_count(&nfsd_file_lru)); 521 nfsd_file_dispose_list_delayed(&dispose); 522 } 523 524 static void 525 nfsd_file_gc_worker(struct work_struct *work) 526 { 527 nfsd_file_gc(); 528 if (list_lru_count(&nfsd_file_lru)) 529 nfsd_file_schedule_laundrette(); 530 } 531 532 static unsigned long 533 nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc) 534 { 535 return list_lru_count(&nfsd_file_lru); 536 } 537 538 static unsigned long 539 nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc) 540 { 541 LIST_HEAD(dispose); 542 unsigned long ret; 543 544 ret = list_lru_shrink_walk(&nfsd_file_lru, sc, 545 nfsd_file_lru_cb, &dispose); 546 trace_nfsd_file_shrinker_removed(ret, list_lru_count(&nfsd_file_lru)); 547 nfsd_file_dispose_list_delayed(&dispose); 548 return ret; 549 } 550 551 static struct shrinker *nfsd_file_shrinker; 552 553 /** 554 * nfsd_file_cond_queue - conditionally unhash and queue a nfsd_file 555 * @nf: nfsd_file to attempt to queue 556 * @dispose: private list to queue successfully-put objects 557 * 558 * Unhash an nfsd_file, try to get a reference to it, and then put that 559 * reference. If it's the last reference, queue it to the dispose list. 560 */ 561 static void 562 nfsd_file_cond_queue(struct nfsd_file *nf, struct list_head *dispose) 563 __must_hold(RCU) 564 { 565 int decrement = 1; 566 567 /* If we raced with someone else unhashing, ignore it */ 568 if (!nfsd_file_unhash(nf)) 569 return; 570 571 /* If we can't get a reference, ignore it */ 572 if (!nfsd_file_get(nf)) 573 return; 574 575 /* Extra decrement if we remove from the LRU */ 576 if (nfsd_file_lru_remove(nf)) 577 ++decrement; 578 579 /* If refcount goes to 0, then put on the dispose list */ 580 if (refcount_sub_and_test(decrement, &nf->nf_ref)) { 581 list_add(&nf->nf_lru, dispose); 582 trace_nfsd_file_closing(nf); 583 } 584 } 585 586 /** 587 * nfsd_file_queue_for_close: try to close out any open nfsd_files for an inode 588 * @inode: inode on which to close out nfsd_files 589 * @dispose: list on which to gather nfsd_files to close out 590 * 591 * An nfsd_file represents a struct file being held open on behalf of nfsd. 592 * An open file however can block other activity (such as leases), or cause 593 * undesirable behavior (e.g. spurious silly-renames when reexporting NFS). 594 * 595 * This function is intended to find open nfsd_files when this sort of 596 * conflicting access occurs and then attempt to close those files out. 597 * 598 * Populates the dispose list with entries that have already had their 599 * refcounts go to zero. The actual free of an nfsd_file can be expensive, 600 * so we leave it up to the caller whether it wants to wait or not. 601 */ 602 static void 603 nfsd_file_queue_for_close(struct inode *inode, struct list_head *dispose) 604 { 605 struct rhlist_head *tmp, *list; 606 struct nfsd_file *nf; 607 608 rcu_read_lock(); 609 list = rhltable_lookup(&nfsd_file_rhltable, &inode, 610 nfsd_file_rhash_params); 611 rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist) { 612 if (!test_bit(NFSD_FILE_GC, &nf->nf_flags)) 613 continue; 614 nfsd_file_cond_queue(nf, dispose); 615 } 616 rcu_read_unlock(); 617 } 618 619 /** 620 * nfsd_file_close_inode - attempt a delayed close of a nfsd_file 621 * @inode: inode of the file to attempt to remove 622 * 623 * Close out any open nfsd_files that can be reaped for @inode. The 624 * actual freeing is deferred to the dispose_list_delayed infrastructure. 625 * 626 * This is used by the fsnotify callbacks and setlease notifier. 627 */ 628 static void 629 nfsd_file_close_inode(struct inode *inode) 630 { 631 LIST_HEAD(dispose); 632 633 nfsd_file_queue_for_close(inode, &dispose); 634 nfsd_file_dispose_list_delayed(&dispose); 635 } 636 637 /** 638 * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file 639 * @inode: inode of the file to attempt to remove 640 * 641 * Close out any open nfsd_files that can be reaped for @inode. The 642 * nfsd_files are closed out synchronously. 643 * 644 * This is called from nfsd_rename and nfsd_unlink to avoid silly-renames 645 * when reexporting NFS. 646 */ 647 void 648 nfsd_file_close_inode_sync(struct inode *inode) 649 { 650 struct nfsd_file *nf; 651 LIST_HEAD(dispose); 652 653 trace_nfsd_file_close(inode); 654 655 nfsd_file_queue_for_close(inode, &dispose); 656 while (!list_empty(&dispose)) { 657 nf = list_first_entry(&dispose, struct nfsd_file, nf_lru); 658 list_del_init(&nf->nf_lru); 659 nfsd_file_free(nf); 660 } 661 } 662 663 static int 664 nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg, 665 void *data) 666 { 667 struct file_lease *fl = data; 668 669 /* Only close files for F_SETLEASE leases */ 670 if (fl->c.flc_flags & FL_LEASE) 671 nfsd_file_close_inode(file_inode(fl->c.flc_file)); 672 return 0; 673 } 674 675 static struct notifier_block nfsd_file_lease_notifier = { 676 .notifier_call = nfsd_file_lease_notifier_call, 677 }; 678 679 static int 680 nfsd_file_fsnotify_handle_event(struct fsnotify_mark *mark, u32 mask, 681 struct inode *inode, struct inode *dir, 682 const struct qstr *name, u32 cookie) 683 { 684 if (WARN_ON_ONCE(!inode)) 685 return 0; 686 687 trace_nfsd_file_fsnotify_handle_event(inode, mask); 688 689 /* Should be no marks on non-regular files */ 690 if (!S_ISREG(inode->i_mode)) { 691 WARN_ON_ONCE(1); 692 return 0; 693 } 694 695 /* don't close files if this was not the last link */ 696 if (mask & FS_ATTRIB) { 697 if (inode->i_nlink) 698 return 0; 699 } 700 701 nfsd_file_close_inode(inode); 702 return 0; 703 } 704 705 706 static const struct fsnotify_ops nfsd_file_fsnotify_ops = { 707 .handle_inode_event = nfsd_file_fsnotify_handle_event, 708 .free_mark = nfsd_file_mark_free, 709 }; 710 711 int 712 nfsd_file_cache_init(void) 713 { 714 int ret; 715 716 lockdep_assert_held(&nfsd_mutex); 717 if (test_and_set_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1) 718 return 0; 719 720 ret = rhltable_init(&nfsd_file_rhltable, &nfsd_file_rhash_params); 721 if (ret) 722 goto out; 723 724 ret = -ENOMEM; 725 nfsd_file_slab = KMEM_CACHE(nfsd_file, 0); 726 if (!nfsd_file_slab) { 727 pr_err("nfsd: unable to create nfsd_file_slab\n"); 728 goto out_err; 729 } 730 731 nfsd_file_mark_slab = KMEM_CACHE(nfsd_file_mark, 0); 732 if (!nfsd_file_mark_slab) { 733 pr_err("nfsd: unable to create nfsd_file_mark_slab\n"); 734 goto out_err; 735 } 736 737 ret = list_lru_init(&nfsd_file_lru); 738 if (ret) { 739 pr_err("nfsd: failed to init nfsd_file_lru: %d\n", ret); 740 goto out_err; 741 } 742 743 nfsd_file_shrinker = shrinker_alloc(0, "nfsd-filecache"); 744 if (!nfsd_file_shrinker) { 745 ret = -ENOMEM; 746 pr_err("nfsd: failed to allocate nfsd_file_shrinker\n"); 747 goto out_lru; 748 } 749 750 nfsd_file_shrinker->count_objects = nfsd_file_lru_count; 751 nfsd_file_shrinker->scan_objects = nfsd_file_lru_scan; 752 nfsd_file_shrinker->seeks = 1; 753 754 shrinker_register(nfsd_file_shrinker); 755 756 ret = lease_register_notifier(&nfsd_file_lease_notifier); 757 if (ret) { 758 pr_err("nfsd: unable to register lease notifier: %d\n", ret); 759 goto out_shrinker; 760 } 761 762 nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops, 763 FSNOTIFY_GROUP_NOFS); 764 if (IS_ERR(nfsd_file_fsnotify_group)) { 765 pr_err("nfsd: unable to create fsnotify group: %ld\n", 766 PTR_ERR(nfsd_file_fsnotify_group)); 767 ret = PTR_ERR(nfsd_file_fsnotify_group); 768 nfsd_file_fsnotify_group = NULL; 769 goto out_notifier; 770 } 771 772 INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_gc_worker); 773 out: 774 if (ret) 775 clear_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags); 776 return ret; 777 out_notifier: 778 lease_unregister_notifier(&nfsd_file_lease_notifier); 779 out_shrinker: 780 shrinker_free(nfsd_file_shrinker); 781 out_lru: 782 list_lru_destroy(&nfsd_file_lru); 783 out_err: 784 kmem_cache_destroy(nfsd_file_slab); 785 nfsd_file_slab = NULL; 786 kmem_cache_destroy(nfsd_file_mark_slab); 787 nfsd_file_mark_slab = NULL; 788 rhltable_destroy(&nfsd_file_rhltable); 789 goto out; 790 } 791 792 /** 793 * __nfsd_file_cache_purge: clean out the cache for shutdown 794 * @net: net-namespace to shut down the cache (may be NULL) 795 * 796 * Walk the nfsd_file cache and close out any that match @net. If @net is NULL, 797 * then close out everything. Called when an nfsd instance is being shut down, 798 * and when the exports table is flushed. 799 */ 800 static void 801 __nfsd_file_cache_purge(struct net *net) 802 { 803 struct rhashtable_iter iter; 804 struct nfsd_file *nf; 805 LIST_HEAD(dispose); 806 807 rhltable_walk_enter(&nfsd_file_rhltable, &iter); 808 do { 809 rhashtable_walk_start(&iter); 810 811 nf = rhashtable_walk_next(&iter); 812 while (!IS_ERR_OR_NULL(nf)) { 813 if (!net || nf->nf_net == net) 814 nfsd_file_cond_queue(nf, &dispose); 815 nf = rhashtable_walk_next(&iter); 816 } 817 818 rhashtable_walk_stop(&iter); 819 } while (nf == ERR_PTR(-EAGAIN)); 820 rhashtable_walk_exit(&iter); 821 822 nfsd_file_dispose_list(&dispose); 823 } 824 825 static struct nfsd_fcache_disposal * 826 nfsd_alloc_fcache_disposal(void) 827 { 828 struct nfsd_fcache_disposal *l; 829 830 l = kmalloc(sizeof(*l), GFP_KERNEL); 831 if (!l) 832 return NULL; 833 spin_lock_init(&l->lock); 834 INIT_LIST_HEAD(&l->freeme); 835 return l; 836 } 837 838 static void 839 nfsd_free_fcache_disposal(struct nfsd_fcache_disposal *l) 840 { 841 nfsd_file_dispose_list(&l->freeme); 842 kfree(l); 843 } 844 845 static void 846 nfsd_free_fcache_disposal_net(struct net *net) 847 { 848 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 849 struct nfsd_fcache_disposal *l = nn->fcache_disposal; 850 851 nfsd_free_fcache_disposal(l); 852 } 853 854 int 855 nfsd_file_cache_start_net(struct net *net) 856 { 857 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 858 859 nn->fcache_disposal = nfsd_alloc_fcache_disposal(); 860 return nn->fcache_disposal ? 0 : -ENOMEM; 861 } 862 863 /** 864 * nfsd_file_cache_purge - Remove all cache items associated with @net 865 * @net: target net namespace 866 * 867 */ 868 void 869 nfsd_file_cache_purge(struct net *net) 870 { 871 lockdep_assert_held(&nfsd_mutex); 872 if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1) 873 __nfsd_file_cache_purge(net); 874 } 875 876 void 877 nfsd_file_cache_shutdown_net(struct net *net) 878 { 879 nfsd_file_cache_purge(net); 880 nfsd_free_fcache_disposal_net(net); 881 } 882 883 void 884 nfsd_file_cache_shutdown(void) 885 { 886 int i; 887 888 lockdep_assert_held(&nfsd_mutex); 889 if (test_and_clear_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 0) 890 return; 891 892 lease_unregister_notifier(&nfsd_file_lease_notifier); 893 shrinker_free(nfsd_file_shrinker); 894 /* 895 * make sure all callers of nfsd_file_lru_cb are done before 896 * calling nfsd_file_cache_purge 897 */ 898 cancel_delayed_work_sync(&nfsd_filecache_laundrette); 899 __nfsd_file_cache_purge(NULL); 900 list_lru_destroy(&nfsd_file_lru); 901 rcu_barrier(); 902 fsnotify_put_group(nfsd_file_fsnotify_group); 903 nfsd_file_fsnotify_group = NULL; 904 kmem_cache_destroy(nfsd_file_slab); 905 nfsd_file_slab = NULL; 906 fsnotify_wait_marks_destroyed(); 907 kmem_cache_destroy(nfsd_file_mark_slab); 908 nfsd_file_mark_slab = NULL; 909 rhltable_destroy(&nfsd_file_rhltable); 910 911 for_each_possible_cpu(i) { 912 per_cpu(nfsd_file_cache_hits, i) = 0; 913 per_cpu(nfsd_file_acquisitions, i) = 0; 914 per_cpu(nfsd_file_releases, i) = 0; 915 per_cpu(nfsd_file_total_age, i) = 0; 916 per_cpu(nfsd_file_evictions, i) = 0; 917 } 918 } 919 920 static struct nfsd_file * 921 nfsd_file_lookup_locked(const struct net *net, const struct cred *cred, 922 struct inode *inode, unsigned char need, 923 bool want_gc) 924 { 925 struct rhlist_head *tmp, *list; 926 struct nfsd_file *nf; 927 928 list = rhltable_lookup(&nfsd_file_rhltable, &inode, 929 nfsd_file_rhash_params); 930 rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist) { 931 if (nf->nf_may != need) 932 continue; 933 if (nf->nf_net != net) 934 continue; 935 if (!nfsd_match_cred(nf->nf_cred, cred)) 936 continue; 937 if (test_bit(NFSD_FILE_GC, &nf->nf_flags) != want_gc) 938 continue; 939 if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) 940 continue; 941 942 if (!nfsd_file_get(nf)) 943 continue; 944 return nf; 945 } 946 return NULL; 947 } 948 949 /** 950 * nfsd_file_is_cached - are there any cached open files for this inode? 951 * @inode: inode to check 952 * 953 * The lookup matches inodes in all net namespaces and is atomic wrt 954 * nfsd_file_acquire(). 955 * 956 * Return values: 957 * %true: filecache contains at least one file matching this inode 958 * %false: filecache contains no files matching this inode 959 */ 960 bool 961 nfsd_file_is_cached(struct inode *inode) 962 { 963 struct rhlist_head *tmp, *list; 964 struct nfsd_file *nf; 965 bool ret = false; 966 967 rcu_read_lock(); 968 list = rhltable_lookup(&nfsd_file_rhltable, &inode, 969 nfsd_file_rhash_params); 970 rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist) 971 if (test_bit(NFSD_FILE_GC, &nf->nf_flags)) { 972 ret = true; 973 break; 974 } 975 rcu_read_unlock(); 976 977 trace_nfsd_file_is_cached(inode, (int)ret); 978 return ret; 979 } 980 981 static __be32 982 nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, 983 unsigned int may_flags, struct file *file, 984 struct nfsd_file **pnf, bool want_gc) 985 { 986 unsigned char need = may_flags & NFSD_FILE_MAY_MASK; 987 struct net *net = SVC_NET(rqstp); 988 struct nfsd_file *new, *nf; 989 bool stale_retry = true; 990 bool open_retry = true; 991 struct inode *inode; 992 __be32 status; 993 int ret; 994 995 retry: 996 status = fh_verify(rqstp, fhp, S_IFREG, 997 may_flags|NFSD_MAY_OWNER_OVERRIDE); 998 if (status != nfs_ok) 999 return status; 1000 inode = d_inode(fhp->fh_dentry); 1001 1002 rcu_read_lock(); 1003 nf = nfsd_file_lookup_locked(net, current_cred(), inode, need, want_gc); 1004 rcu_read_unlock(); 1005 1006 if (nf) { 1007 /* 1008 * If the nf is on the LRU then it holds an extra reference 1009 * that must be put if it's removed. It had better not be 1010 * the last one however, since we should hold another. 1011 */ 1012 if (nfsd_file_lru_remove(nf)) 1013 WARN_ON_ONCE(refcount_dec_and_test(&nf->nf_ref)); 1014 goto wait_for_construction; 1015 } 1016 1017 new = nfsd_file_alloc(net, inode, need, want_gc); 1018 if (!new) { 1019 status = nfserr_jukebox; 1020 goto out; 1021 } 1022 1023 rcu_read_lock(); 1024 spin_lock(&inode->i_lock); 1025 nf = nfsd_file_lookup_locked(net, current_cred(), inode, need, want_gc); 1026 if (unlikely(nf)) { 1027 spin_unlock(&inode->i_lock); 1028 rcu_read_unlock(); 1029 nfsd_file_slab_free(&new->nf_rcu); 1030 goto wait_for_construction; 1031 } 1032 nf = new; 1033 ret = rhltable_insert(&nfsd_file_rhltable, &nf->nf_rlist, 1034 nfsd_file_rhash_params); 1035 spin_unlock(&inode->i_lock); 1036 rcu_read_unlock(); 1037 if (likely(ret == 0)) 1038 goto open_file; 1039 1040 trace_nfsd_file_insert_err(rqstp, inode, may_flags, ret); 1041 status = nfserr_jukebox; 1042 goto construction_err; 1043 1044 wait_for_construction: 1045 wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE); 1046 1047 /* Did construction of this file fail? */ 1048 if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 1049 trace_nfsd_file_cons_err(rqstp, inode, may_flags, nf); 1050 if (!open_retry) { 1051 status = nfserr_jukebox; 1052 goto construction_err; 1053 } 1054 nfsd_file_put(nf); 1055 open_retry = false; 1056 fh_put(fhp); 1057 goto retry; 1058 } 1059 this_cpu_inc(nfsd_file_cache_hits); 1060 1061 status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags)); 1062 if (status != nfs_ok) { 1063 nfsd_file_put(nf); 1064 nf = NULL; 1065 } 1066 1067 out: 1068 if (status == nfs_ok) { 1069 this_cpu_inc(nfsd_file_acquisitions); 1070 nfsd_file_check_write_error(nf); 1071 *pnf = nf; 1072 } 1073 trace_nfsd_file_acquire(rqstp, inode, may_flags, nf, status); 1074 return status; 1075 1076 open_file: 1077 trace_nfsd_file_alloc(nf); 1078 nf->nf_mark = nfsd_file_mark_find_or_create(nf, inode); 1079 if (nf->nf_mark) { 1080 if (file) { 1081 get_file(file); 1082 nf->nf_file = file; 1083 status = nfs_ok; 1084 trace_nfsd_file_opened(nf, status); 1085 } else { 1086 ret = nfsd_open_verified(rqstp, fhp, may_flags, 1087 &nf->nf_file); 1088 if (ret == -EOPENSTALE && stale_retry) { 1089 stale_retry = false; 1090 nfsd_file_unhash(nf); 1091 clear_and_wake_up_bit(NFSD_FILE_PENDING, 1092 &nf->nf_flags); 1093 if (refcount_dec_and_test(&nf->nf_ref)) 1094 nfsd_file_free(nf); 1095 nf = NULL; 1096 fh_put(fhp); 1097 goto retry; 1098 } 1099 status = nfserrno(ret); 1100 trace_nfsd_file_open(nf, status); 1101 } 1102 } else 1103 status = nfserr_jukebox; 1104 /* 1105 * If construction failed, or we raced with a call to unlink() 1106 * then unhash. 1107 */ 1108 if (status != nfs_ok || inode->i_nlink == 0) 1109 nfsd_file_unhash(nf); 1110 clear_and_wake_up_bit(NFSD_FILE_PENDING, &nf->nf_flags); 1111 if (status == nfs_ok) 1112 goto out; 1113 1114 construction_err: 1115 if (refcount_dec_and_test(&nf->nf_ref)) 1116 nfsd_file_free(nf); 1117 nf = NULL; 1118 goto out; 1119 } 1120 1121 /** 1122 * nfsd_file_acquire_gc - Get a struct nfsd_file with an open file 1123 * @rqstp: the RPC transaction being executed 1124 * @fhp: the NFS filehandle of the file to be opened 1125 * @may_flags: NFSD_MAY_ settings for the file 1126 * @pnf: OUT: new or found "struct nfsd_file" object 1127 * 1128 * The nfsd_file object returned by this API is reference-counted 1129 * and garbage-collected. The object is retained for a few 1130 * seconds after the final nfsd_file_put() in case the caller 1131 * wants to re-use it. 1132 * 1133 * Return values: 1134 * %nfs_ok - @pnf points to an nfsd_file with its reference 1135 * count boosted. 1136 * 1137 * On error, an nfsstat value in network byte order is returned. 1138 */ 1139 __be32 1140 nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp, 1141 unsigned int may_flags, struct nfsd_file **pnf) 1142 { 1143 return nfsd_file_do_acquire(rqstp, fhp, may_flags, NULL, pnf, true); 1144 } 1145 1146 /** 1147 * nfsd_file_acquire - Get a struct nfsd_file with an open file 1148 * @rqstp: the RPC transaction being executed 1149 * @fhp: the NFS filehandle of the file to be opened 1150 * @may_flags: NFSD_MAY_ settings for the file 1151 * @pnf: OUT: new or found "struct nfsd_file" object 1152 * 1153 * The nfsd_file_object returned by this API is reference-counted 1154 * but not garbage-collected. The object is unhashed after the 1155 * final nfsd_file_put(). 1156 * 1157 * Return values: 1158 * %nfs_ok - @pnf points to an nfsd_file with its reference 1159 * count boosted. 1160 * 1161 * On error, an nfsstat value in network byte order is returned. 1162 */ 1163 __be32 1164 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, 1165 unsigned int may_flags, struct nfsd_file **pnf) 1166 { 1167 return nfsd_file_do_acquire(rqstp, fhp, may_flags, NULL, pnf, false); 1168 } 1169 1170 /** 1171 * nfsd_file_acquire_opened - Get a struct nfsd_file using existing open file 1172 * @rqstp: the RPC transaction being executed 1173 * @fhp: the NFS filehandle of the file just created 1174 * @may_flags: NFSD_MAY_ settings for the file 1175 * @file: cached, already-open file (may be NULL) 1176 * @pnf: OUT: new or found "struct nfsd_file" object 1177 * 1178 * Acquire a nfsd_file object that is not GC'ed. If one doesn't already exist, 1179 * and @file is non-NULL, use it to instantiate a new nfsd_file instead of 1180 * opening a new one. 1181 * 1182 * Return values: 1183 * %nfs_ok - @pnf points to an nfsd_file with its reference 1184 * count boosted. 1185 * 1186 * On error, an nfsstat value in network byte order is returned. 1187 */ 1188 __be32 1189 nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp, 1190 unsigned int may_flags, struct file *file, 1191 struct nfsd_file **pnf) 1192 { 1193 return nfsd_file_do_acquire(rqstp, fhp, may_flags, file, pnf, false); 1194 } 1195 1196 /* 1197 * Note that fields may be added, removed or reordered in the future. Programs 1198 * scraping this file for info should test the labels to ensure they're 1199 * getting the correct field. 1200 */ 1201 int nfsd_file_cache_stats_show(struct seq_file *m, void *v) 1202 { 1203 unsigned long releases = 0, evictions = 0; 1204 unsigned long hits = 0, acquisitions = 0; 1205 unsigned int i, count = 0, buckets = 0; 1206 unsigned long lru = 0, total_age = 0; 1207 1208 /* Serialize with server shutdown */ 1209 mutex_lock(&nfsd_mutex); 1210 if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1) { 1211 struct bucket_table *tbl; 1212 struct rhashtable *ht; 1213 1214 lru = list_lru_count(&nfsd_file_lru); 1215 1216 rcu_read_lock(); 1217 ht = &nfsd_file_rhltable.ht; 1218 count = atomic_read(&ht->nelems); 1219 tbl = rht_dereference_rcu(ht->tbl, ht); 1220 buckets = tbl->size; 1221 rcu_read_unlock(); 1222 } 1223 mutex_unlock(&nfsd_mutex); 1224 1225 for_each_possible_cpu(i) { 1226 hits += per_cpu(nfsd_file_cache_hits, i); 1227 acquisitions += per_cpu(nfsd_file_acquisitions, i); 1228 releases += per_cpu(nfsd_file_releases, i); 1229 total_age += per_cpu(nfsd_file_total_age, i); 1230 evictions += per_cpu(nfsd_file_evictions, i); 1231 } 1232 1233 seq_printf(m, "total inodes: %u\n", count); 1234 seq_printf(m, "hash buckets: %u\n", buckets); 1235 seq_printf(m, "lru entries: %lu\n", lru); 1236 seq_printf(m, "cache hits: %lu\n", hits); 1237 seq_printf(m, "acquisitions: %lu\n", acquisitions); 1238 seq_printf(m, "releases: %lu\n", releases); 1239 seq_printf(m, "evictions: %lu\n", evictions); 1240 if (releases) 1241 seq_printf(m, "mean age (ms): %ld\n", total_age / releases); 1242 else 1243 seq_printf(m, "mean age (ms): -\n"); 1244 return 0; 1245 } 1246
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.