1 // SPDX-License-Identifier: GPL-2.0 1 2 3 #include "bcachefs.h" 4 #include "alloc_background.h" 5 #include "bkey_buf.h" 6 #include "btree_journal_iter.h" 7 #include "btree_node_scan.h" 8 #include "btree_update.h" 9 #include "btree_update_interior.h" 10 #include "btree_io.h" 11 #include "buckets.h" 12 #include "dirent.h" 13 #include "disk_accounting.h" 14 #include "errcode.h" 15 #include "error.h" 16 #include "fs-common.h" 17 #include "journal_io.h" 18 #include "journal_reclaim.h" 19 #include "journal_seq_blacklist.h" 20 #include "logged_ops.h" 21 #include "move.h" 22 #include "quota.h" 23 #include "rebalance.h" 24 #include "recovery.h" 25 #include "recovery_passes.h" 26 #include "replicas.h" 27 #include "sb-clean.h" 28 #include "sb-downgrade.h" 29 #include "snapshot.h" 30 #include "super-io.h" 31 32 #include <linux/sort.h> 33 #include <linux/stat.h> 34 35 #define QSTR(n) { { { .len = strlen(n) } }, .n 36 37 void bch2_btree_lost_data(struct bch_fs *c, en 38 { 39 if (btree >= BTREE_ID_NR_MAX) 40 return; 41 42 u64 b = BIT_ULL(btree); 43 44 if (!(c->sb.btrees_lost_data & b)) { 45 bch_err(c, "flagging btree %s 46 47 mutex_lock(&c->sb_lock); 48 bch2_sb_field_get(c->disk_sb.s 49 bch2_write_super(c); 50 mutex_unlock(&c->sb_lock); 51 } 52 } 53 54 /* for -o reconstruct_alloc: */ 55 static void bch2_reconstruct_alloc(struct bch_ 56 { 57 bch2_journal_log_msg(c, "dropping allo 58 bch_info(c, "dropping and reconstructi 59 60 mutex_lock(&c->sb_lock); 61 struct bch_sb_field_ext *ext = bch2_sb 62 63 __set_bit_le64(BCH_RECOVERY_PASS_STABL 64 __set_bit_le64(BCH_RECOVERY_PASS_STABL 65 __set_bit_le64(BCH_RECOVERY_PASS_STABL 66 __set_bit_le64(BCH_RECOVERY_PASS_STABL 67 __set_bit_le64(BCH_RECOVERY_PASS_STABL 68 69 __set_bit_le64(BCH_FSCK_ERR_ptr_to_mis 70 __set_bit_le64(BCH_FSCK_ERR_ptr_gen_ne 71 __set_bit_le64(BCH_FSCK_ERR_stale_dirt 72 73 __set_bit_le64(BCH_FSCK_ERR_dev_usage_ 74 __set_bit_le64(BCH_FSCK_ERR_dev_usage_ 75 __set_bit_le64(BCH_FSCK_ERR_dev_usage_ 76 77 __set_bit_le64(BCH_FSCK_ERR_fs_usage_b 78 __set_bit_le64(BCH_FSCK_ERR_fs_usage_c 79 __set_bit_le64(BCH_FSCK_ERR_fs_usage_p 80 __set_bit_le64(BCH_FSCK_ERR_fs_usage_r 81 82 __set_bit_le64(BCH_FSCK_ERR_alloc_key_ 83 __set_bit_le64(BCH_FSCK_ERR_alloc_key_ 84 __set_bit_le64(BCH_FSCK_ERR_alloc_key_ 85 __set_bit_le64(BCH_FSCK_ERR_alloc_key_ 86 __set_bit_le64(BCH_FSCK_ERR_alloc_key_ 87 __set_bit_le64(BCH_FSCK_ERR_alloc_key_ 88 __set_bit_le64(BCH_FSCK_ERR_need_disca 89 __set_bit_le64(BCH_FSCK_ERR_freespace_ 90 __set_bit_le64(BCH_FSCK_ERR_bucket_gen 91 __set_bit_le64(BCH_FSCK_ERR_freespace_ 92 __set_bit_le64(BCH_FSCK_ERR_ptr_to_mis 93 __set_bit_le64(BCH_FSCK_ERR_lru_entry_ 94 __set_bit_le64(BCH_FSCK_ERR_accounting 95 c->sb.compat &= ~(1ULL << BCH_COMPAT_a 96 97 c->opts.recovery_passes |= bch2_recove 98 99 bch2_write_super(c); 100 mutex_unlock(&c->sb_lock); 101 102 bch2_shoot_down_journal_keys(c, BTREE_ 103 0, BTREE_ 104 bch2_shoot_down_journal_keys(c, BTREE_ 105 0, BTREE_ 106 bch2_shoot_down_journal_keys(c, BTREE_ 107 0, BTREE_ 108 bch2_shoot_down_journal_keys(c, BTREE_ 109 0, BTREE_ 110 bch2_shoot_down_journal_keys(c, BTREE_ 111 0, BTREE_ 112 } 113 114 /* 115 * Btree node pointers have a field to stack a 116 * node; we need to zero out this field when r 117 * reading in keys from the journal: 118 */ 119 static void zero_out_btree_mem_ptr(struct jour 120 { 121 darray_for_each(*keys, i) 122 if (i->k->k.type == KEY_TYPE_b 123 bkey_i_to_btree_ptr_v2 124 } 125 126 /* journal replay: */ 127 128 static void replay_now_at(struct journal *j, u 129 { 130 BUG_ON(seq < j->replay_journal_seq); 131 132 seq = min(seq, j->replay_journal_seq_e 133 134 while (j->replay_journal_seq < seq) 135 bch2_journal_pin_put(j, j->rep 136 } 137 138 static int bch2_journal_replay_accounting_key( 139 140 { 141 struct btree_iter iter; 142 bch2_trans_node_iter_init(trans, &iter 143 BTREE_MAX_DE 144 BTREE_ITER_i 145 int ret = bch2_btree_iter_traverse(&it 146 if (ret) 147 goto out; 148 149 struct bkey u; 150 struct bkey_s_c old = bch2_btree_path_ 151 152 /* Has this delta already been applied 153 if (bversion_cmp(old.k->bversion, k->k 154 ret = 0; 155 goto out; 156 } 157 158 struct bkey_i *new = k->k; 159 if (old.k->type == KEY_TYPE_accounting 160 new = bch2_bkey_make_mut_noupd 161 ret = PTR_ERR_OR_ZERO(new); 162 if (ret) 163 goto out; 164 165 bch2_accounting_accumulate(bke 166 bke 167 } 168 169 trans->journal_res.seq = k->journal_se 170 171 ret = bch2_trans_update(trans, &iter, 172 out: 173 bch2_trans_iter_exit(trans, &iter); 174 return ret; 175 } 176 177 static int bch2_journal_replay_key(struct btre 178 struct jour 179 { 180 struct btree_iter iter; 181 unsigned iter_flags = 182 BTREE_ITER_intent| 183 BTREE_ITER_not_extents; 184 unsigned update_flags = BTREE_TRIGGER_ 185 int ret; 186 187 if (k->overwritten) 188 return 0; 189 190 trans->journal_res.seq = k->journal_se 191 192 /* 193 * BTREE_UPDATE_key_cache_reclaim disa 194 * keep the key cache coherent with th 195 * besides the allocator is doing upda 196 * coherency for non-alloc btrees, and 197 * btrees use BTREE_ITER_filter_snapsh 198 * the snapshots recovery pass runs. 199 */ 200 if (!k->level && k->btree_id == BTREE_ 201 iter_flags |= BTREE_ITER_cache 202 else 203 update_flags |= BTREE_UPDATE_k 204 205 bch2_trans_node_iter_init(trans, &iter 206 BTREE_MAX_DE 207 iter_flags); 208 ret = bch2_btree_iter_traverse(&iter); 209 if (ret) 210 goto out; 211 212 struct btree_path *path = btree_iter_p 213 if (unlikely(!btree_path_node(path, k- 214 bch2_trans_iter_exit(trans, &i 215 bch2_trans_node_iter_init(tran 216 BTRE 217 ret = bch2_btree_iter_traver 218 bch2_btree_increase_de 219 -BCH_ERR_transaction_r 220 goto out; 221 } 222 223 /* Must be checked with btree locked: 224 if (k->overwritten) 225 goto out; 226 227 if (k->k->k.type == KEY_TYPE_accountin 228 ret = bch2_trans_update_buffer 229 goto out; 230 } 231 232 ret = bch2_trans_update(trans, &iter, 233 out: 234 bch2_trans_iter_exit(trans, &iter); 235 return ret; 236 } 237 238 static int journal_sort_seq_cmp(const void *_l 239 { 240 const struct journal_key *l = *((const 241 const struct journal_key *r = *((const 242 243 /* 244 * Map 0 to U64_MAX, so that keys with 245 * 246 * journal_seq == 0 means that the key 247 * should be inserted last so as to av 248 */ 249 return cmp_int(l->journal_seq - 1, r-> 250 } 251 252 int bch2_journal_replay(struct bch_fs *c) 253 { 254 struct journal_keys *keys = &c->journa 255 DARRAY(struct journal_key *) keys_sort 256 struct journal *j = &c->journal; 257 u64 start_seq = c->journal_replay_se 258 u64 end_seq = c->journal_replay_se 259 struct btree_trans *trans = NULL; 260 bool immediate_flush = false; 261 int ret = 0; 262 263 if (keys->nr) { 264 ret = bch2_journal_log_msg(c, 265 key 266 if (ret) 267 goto err; 268 } 269 270 BUG_ON(!atomic_read(&keys->ref)); 271 272 move_gap(keys, keys->nr); 273 trans = bch2_trans_get(c); 274 275 /* 276 * Replay accounting keys first: we ca 277 * flush accounting keys until we're d 278 */ 279 darray_for_each(*keys, k) { 280 if (!(k->k->k.type == KEY_TYPE 281 continue; 282 283 cond_resched(); 284 285 ret = commit_do(trans, NULL, N 286 BCH_TRANS_COMM 287 BCH_TRANS_COMM 288 BCH_TRANS_COMM 289 BCH_TRANS_COMM 290 BCH_WATERMARK_ 291 bch2_journal_repl 292 if (bch2_fs_fatal_err_on(ret, 293 goto err; 294 295 k->overwritten = true; 296 } 297 298 set_bit(BCH_FS_accounting_replay_done, 299 300 /* 301 * First, attempt to replay keys in so 302 * efficient - better locality of btre 303 * that would cause a journal deadlock 304 */ 305 darray_for_each(*keys, k) { 306 cond_resched(); 307 308 /* 309 * k->allocated means the key 310 * rather it was from early re 311 */ 312 if (k->allocated) 313 immediate_flush = true 314 315 /* Skip fastpath if we're low 316 ret = c->journal.watermark ? - 317 commit_do(trans, NULL, 318 BCH_TRANS_CO 319 BCH_TRANS_CO 320 BCH_TRANS_CO 321 (!k->allocat 322 bch2_journal_repl 323 BUG_ON(!ret && !k->overwritten 324 if (ret) { 325 ret = darray_push(&key 326 if (ret) 327 goto err; 328 } 329 } 330 331 bch2_trans_unlock_long(trans); 332 /* 333 * Now, replay any remaining keys in t 334 * the journal, unpinning those journa 335 */ 336 sort(keys_sorted.data, keys_sorted.nr, 337 sizeof(keys_sorted.data[0]), 338 journal_sort_seq_cmp, NULL); 339 340 darray_for_each(keys_sorted, kp) { 341 cond_resched(); 342 343 struct journal_key *k = *kp; 344 345 if (k->journal_seq) 346 replay_now_at(j, k->jo 347 else 348 replay_now_at(j, j->re 349 350 ret = commit_do(trans, NULL, N 351 BCH_TRANS_COMM 352 BCH_TRANS_COMM 353 (!k->allocated 354 ? BCH_TRANS_C 355 : 0), 356 bch2_journal_repl 357 bch_err_msg(c, ret, "while rep 358 bch2_btree_id_str( 359 if (ret) 360 goto err; 361 362 BUG_ON(k->btree_id != BTREE_ID 363 } 364 365 /* 366 * We need to put our btree_trans befo 367 * that will use a btree_trans interna 368 */ 369 bch2_trans_put(trans); 370 trans = NULL; 371 372 if (!c->opts.retain_recovery_info && 373 c->recovery_pass_done >= BCH_RECOV 374 bch2_journal_keys_put_initial( 375 376 replay_now_at(j, j->replay_journal_seq 377 j->replay_journal_seq = 0; 378 379 bch2_journal_set_replay_done(j); 380 381 /* if we did any repair, flush it imme 382 if (immediate_flush) { 383 bch2_journal_flush_all_pins(&c 384 ret = bch2_journal_meta(&c->jo 385 } 386 387 if (keys->nr) 388 bch2_journal_log_msg(c, "journ 389 err: 390 if (trans) 391 bch2_trans_put(trans); 392 darray_exit(&keys_sorted); 393 bch_err_fn(c, ret); 394 return ret; 395 } 396 397 /* journal replay early: */ 398 399 static int journal_replay_entry_early(struct b 400 struct j 401 { 402 int ret = 0; 403 404 switch (entry->type) { 405 case BCH_JSET_ENTRY_btree_root: { 406 struct btree_root *r; 407 408 if (fsck_err_on(entry->btree_i 409 c, invalid_btr 410 "invalid btree 411 entry->btree_i 412 return 0; 413 414 while (entry->btree_id >= c->b 415 ret = darray_push(&c-> 416 if (ret) 417 return ret; 418 } 419 420 r = bch2_btree_id_root(c, entr 421 422 if (entry->u64s) { 423 r->level = entry->leve 424 bkey_copy(&r->key, (st 425 r->error = 0; 426 } else { 427 r->error = -BCH_ERR_bt 428 } 429 r->alive = true; 430 break; 431 } 432 case BCH_JSET_ENTRY_usage: { 433 struct jset_entry_usage *u = 434 container_of(entry, st 435 436 switch (entry->btree_id) { 437 case BCH_FS_USAGE_key_version: 438 atomic64_set(&c->key_v 439 break; 440 } 441 break; 442 } 443 case BCH_JSET_ENTRY_blacklist: { 444 struct jset_entry_blacklist *b 445 container_of(entry, st 446 447 ret = bch2_journal_seq_blackli 448 le64_to_cpu(bl 449 le64_to_cpu(bl 450 break; 451 } 452 case BCH_JSET_ENTRY_blacklist_v2: { 453 struct jset_entry_blacklist_v2 454 container_of(entry, st 455 456 ret = bch2_journal_seq_blackli 457 le64_to_cpu(bl 458 le64_to_cpu(bl 459 break; 460 } 461 case BCH_JSET_ENTRY_clock: { 462 struct jset_entry_clock *clock 463 container_of(entry, st 464 465 atomic64_set(&c->io_clock[cloc 466 } 467 } 468 fsck_err: 469 return ret; 470 } 471 472 static int journal_replay_early(struct bch_fs 473 struct bch_sb_ 474 { 475 if (clean) { 476 for (struct jset_entry *entry 477 entry != vstruct_end(&cle 478 entry = vstruct_next(entr 479 int ret = journal_repl 480 if (ret) 481 return ret; 482 } 483 } else { 484 struct genradix_iter iter; 485 struct journal_replay *i, **_i 486 487 genradix_for_each(&c->journal_ 488 i = *_i; 489 490 if (journal_replay_ign 491 continue; 492 493 vstruct_for_each(&i->j 494 int ret = jour 495 if (ret) 496 return 497 } 498 } 499 } 500 501 return 0; 502 } 503 504 /* sb clean section: */ 505 506 static int read_btree_roots(struct bch_fs *c) 507 { 508 int ret = 0; 509 510 for (unsigned i = 0; i < btree_id_nr_a 511 struct btree_root *r = bch2_bt 512 513 if (!r->alive) 514 continue; 515 516 if (btree_id_is_alloc(i) && c- 517 continue; 518 519 if (mustfix_fsck_err_on((ret = 520 c, btr 521 "inval 522 bch2_b 523 mustfix_fsck_err_on((ret = 524 c, btr 525 "error 526 bch2_b 527 if (btree_id_is_alloc( 528 c->opts.recove 529 c->opts.recove 530 c->opts.recove 531 c->opts.recove 532 c->opts.recove 533 c->sb.compat & 534 r->error = 0; 535 } else if (!(c->opts.r 536 bch_info(c, "w 537 c->opts.recove 538 c->opts.recove 539 } 540 541 ret = 0; 542 bch2_btree_lost_data(c 543 } 544 } 545 546 for (unsigned i = 0; i < BTREE_ID_NR; 547 struct btree_root *r = bch2_bt 548 549 if (!r->b && !r->error) { 550 r->alive = false; 551 r->level = 0; 552 bch2_btree_root_alloc_ 553 } 554 } 555 fsck_err: 556 return ret; 557 } 558 559 static bool check_version_upgrade(struct bch_f 560 { 561 unsigned latest_version = bcachefs_met 562 unsigned latest_compatible = min(lates 563 bch2_ 564 unsigned old_version = c->sb.version_u 565 unsigned new_version = 0; 566 567 if (old_version < bcachefs_metadata_re 568 if (c->opts.version_upgrade == 569 latest_compatible < bcache 570 new_version = latest_v 571 else 572 new_version = latest_c 573 } else { 574 switch (c->opts.version_upgrad 575 case BCH_VERSION_UPGRADE_compa 576 new_version = latest_c 577 break; 578 case BCH_VERSION_UPGRADE_incom 579 new_version = latest_v 580 break; 581 case BCH_VERSION_UPGRADE_none: 582 new_version = min(old_ 583 break; 584 } 585 } 586 587 if (new_version > old_version) { 588 struct printbuf buf = PRINTBUF 589 590 if (old_version < bcachefs_met 591 prt_str(&buf, "Version 592 593 if (old_version != c->sb.versi 594 prt_str(&buf, "Version 595 bch2_version_to_text(& 596 prt_str(&buf, " to "); 597 bch2_version_to_text(& 598 prt_str(&buf, " incomp 599 } 600 601 prt_printf(&buf, "Doing %s ver 602 BCH_VERSION_MAJOR(o 603 ? "incompatible" : 604 bch2_version_to_text(&buf, old 605 prt_str(&buf, " to "); 606 bch2_version_to_text(&buf, new 607 prt_newline(&buf); 608 609 struct bch_sb_field_ext *ext = 610 __le64 passes = ext->recovery_ 611 bch2_sb_set_upgrade(c, old_ver 612 passes = ext->recovery_passes_ 613 614 if (passes) { 615 prt_str(&buf, " runni 616 prt_bitflags(&buf, bch 617 bch2_reco 618 } 619 620 bch_info(c, "%s", buf.buf); 621 622 bch2_sb_upgrade(c, new_version 623 624 printbuf_exit(&buf); 625 return true; 626 } 627 628 return false; 629 } 630 631 int bch2_fs_recovery(struct bch_fs *c) 632 { 633 struct bch_sb_field_clean *clean = NUL 634 struct jset *last_journal_entry = NULL 635 u64 last_seq = 0, blacklist_seq, journ 636 int ret = 0; 637 638 if (c->sb.clean) { 639 clean = bch2_read_superblock_c 640 ret = PTR_ERR_OR_ZERO(clean); 641 if (ret) 642 goto err; 643 644 bch_info(c, "recovering from c 645 le64_to_cpu(clean->jo 646 } else { 647 bch_info(c, "recovering from u 648 } 649 650 if (!(c->sb.features & (1ULL << BCH_FE 651 bch_err(c, "feature new_extent 652 ret = -EINVAL; 653 goto err; 654 } 655 656 if (!c->sb.clean && 657 !(c->sb.features & (1ULL << BCH_FE 658 bch_err(c, "filesystem needs r 659 ret = -EINVAL; 660 goto err; 661 } 662 663 if (c->opts.norecovery) 664 c->opts.recovery_pass_last = B 665 666 mutex_lock(&c->sb_lock); 667 struct bch_sb_field_ext *ext = bch2_sb 668 bool write_sb = false; 669 670 if (BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk 671 ext->recovery_passes_required[ 672 cpu_to_le64(bch2_recov 673 write_sb = true; 674 } 675 676 u64 sb_passes = bch2_recovery_passes_f 677 if (sb_passes) { 678 struct printbuf buf = PRINTBUF 679 prt_str(&buf, "superblock requ 680 prt_bitflags(&buf, bch2_recove 681 bch_info(c, "%s", buf.buf); 682 printbuf_exit(&buf); 683 } 684 685 if (bch2_check_version_downgrade(c)) { 686 struct printbuf buf = PRINTBUF 687 688 prt_str(&buf, "Version downgra 689 690 __le64 passes = ext->recovery_ 691 bch2_sb_set_downgrade(c, 692 BCH_VERS 693 BCH_VERS 694 passes = ext->recovery_passes_ 695 if (passes) { 696 prt_str(&buf, "\n run 697 prt_bitflags(&buf, bch 698 bch2_reco 699 } 700 701 bch_info(c, "%s", buf.buf); 702 printbuf_exit(&buf); 703 write_sb = true; 704 } 705 706 if (check_version_upgrade(c)) 707 write_sb = true; 708 709 c->opts.recovery_passes |= bch2_recove 710 711 if (write_sb) 712 bch2_write_super(c); 713 mutex_unlock(&c->sb_lock); 714 715 if (c->opts.fsck && IS_ENABLED(CONFIG_ 716 c->opts.recovery_passes |= BIT 717 718 if (c->opts.fsck) 719 set_bit(BCH_FS_fsck_running, & 720 if (c->sb.clean) 721 set_bit(BCH_FS_clean_recovery, 722 723 ret = bch2_blacklist_table_initialize( 724 if (ret) { 725 bch_err(c, "error initializing 726 goto err; 727 } 728 729 bch2_journal_pos_from_member_info_resu 730 731 if (!c->sb.clean || c->opts.retain_rec 732 struct genradix_iter iter; 733 struct journal_replay **i; 734 735 bch_verbose(c, "starting journ 736 ret = bch2_journal_read(c, &la 737 if (ret) 738 goto err; 739 740 /* 741 * note: cmd_list_journal need 742 * it can asterisk ignored jou 743 */ 744 if (c->opts.read_journal_only) 745 goto out; 746 747 genradix_for_each_reverse(&c-> 748 if (!journal_replay_ig 749 last_journal_e 750 break; 751 } 752 753 if (mustfix_fsck_err_on(c->sb. 754 last_j 755 !journ 756 clean_but_jour 757 "filesystem ma 758 c->sb.compat &= ~(1ULL 759 SET_BCH_SB_CLEAN(c->di 760 c->sb.clean = false; 761 } 762 763 if (!last_journal_entry) { 764 fsck_err_on(!c->sb.cle 765 dirty_but_ 766 "no journa 767 if (clean) 768 goto use_clean 769 770 genradix_for_each_reve 771 if (*i) { 772 last_j 773 (*i)-> 774 (*i)-> 775 /* 776 * Thi 777 * so 778 * we' 779 * ent 780 */ 781 (*i)-> 782 break; 783 } 784 } 785 786 ret = bch2_journal_keys_sort(c 787 if (ret) 788 goto err; 789 790 if (c->sb.clean && last_journa 791 ret = bch2_verify_supe 792 793 if (ret) 794 goto err; 795 } 796 } else { 797 use_clean: 798 if (!clean) { 799 bch_err(c, "no superbl 800 ret = -BCH_ERR_fsck_re 801 goto err; 802 803 } 804 blacklist_seq = journal_seq = 805 } 806 807 c->journal_replay_seq_start = last 808 c->journal_replay_seq_end = blac 809 810 if (c->opts.reconstruct_alloc) 811 bch2_reconstruct_alloc(c); 812 813 zero_out_btree_mem_ptr(&c->journal_key 814 815 ret = journal_replay_early(c, clean); 816 if (ret) 817 goto err; 818 819 /* 820 * After an unclean shutdown, skip the 821 * numbers as they may have been refer 822 * happened before their corresponding 823 * writes need to be ignored, by skipp 824 * journal sequence numbers: 825 */ 826 if (!c->sb.clean) 827 journal_seq += 8; 828 829 if (blacklist_seq != journal_seq) { 830 ret = bch2_journal_log_msg(c 831 b 832 bch2_journal_seq_black 833 blackl 834 if (ret) { 835 bch_err_msg(c, ret, "e 836 goto err; 837 } 838 } 839 840 ret = bch2_journal_log_msg(c, "start 841 journal_s 842 bch2_fs_journal_start(&c->jour 843 if (ret) 844 goto err; 845 846 /* 847 * Skip past versions that might have 848 * but hadn't had their pointers writt 849 */ 850 if (c->sb.encryption_type && !c->sb.cl 851 atomic64_add(1 << 16, &c->key_ 852 853 ret = read_btree_roots(c); 854 if (ret) 855 goto err; 856 857 set_bit(BCH_FS_btree_running, &c->flag 858 859 ret = bch2_sb_set_upgrade_extra(c); 860 861 ret = bch2_run_recovery_passes(c); 862 if (ret) 863 goto err; 864 865 /* 866 * Normally set by the appropriate rec 867 * indicates we're in early recovery a 868 * being applied to the journal replay 869 * multithreaded use: 870 */ 871 set_bit(BCH_FS_may_go_rw, &c->flags); 872 clear_bit(BCH_FS_fsck_running, &c->fla 873 874 /* in case we don't run journal replay 875 set_bit(BCH_FS_accounting_replay_done, 876 877 /* fsync if we fixed errors */ 878 if (test_bit(BCH_FS_errors_fixed, &c-> 879 bch2_write_ref_tryget(c, BCH_WRITE 880 bch2_journal_flush_all_pins(&c 881 bch2_journal_meta(&c->journal) 882 bch2_write_ref_put(c, BCH_WRIT 883 } 884 885 /* If we fixed errors, verify that fs 886 if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) 887 test_bit(BCH_FS_errors_fixed, &c-> 888 !test_bit(BCH_FS_errors_not_fixed, 889 !test_bit(BCH_FS_error, &c->flags) 890 bch2_flush_fsck_errs(c); 891 892 bch_info(c, "Fixed errors, run 893 clear_bit(BCH_FS_errors_fixed, 894 895 c->curr_recovery_pass = BCH_RE 896 897 ret = bch2_run_recovery_passes 898 if (ret) 899 goto err; 900 901 if (test_bit(BCH_FS_errors_fix 902 test_bit(BCH_FS_errors_not 903 bch_err(c, "Second fsc 904 set_bit(BCH_FS_errors_ 905 } 906 907 set_bit(BCH_FS_errors_fixed, & 908 } 909 910 if (enabled_qtypes(c)) { 911 bch_verbose(c, "reading quotas 912 ret = bch2_fs_quota_read(c); 913 if (ret) 914 goto err; 915 bch_verbose(c, "quotas done"); 916 } 917 918 mutex_lock(&c->sb_lock); 919 ext = bch2_sb_field_get(c->disk_sb.sb, 920 write_sb = false; 921 922 if (BCH_SB_VERSION_UPGRADE_COMPLETE(c- 923 SET_BCH_SB_VERSION_UPGRADE_COM 924 write_sb = true; 925 } 926 927 if (!test_bit(BCH_FS_error, &c->flags) 928 !(c->disk_sb.sb->compat[0] & cpu_t 929 c->disk_sb.sb->compat[0] |= cp 930 write_sb = true; 931 } 932 933 if (!test_bit(BCH_FS_error, &c->flags) 934 !bch2_is_zero(ext->errors_silent, 935 memset(ext->errors_silent, 0, 936 write_sb = true; 937 } 938 939 if (c->opts.fsck && 940 !test_bit(BCH_FS_error, &c->flags) 941 c->recovery_pass_done == BCH_RECOV 942 ext->btrees_lost_data) { 943 ext->btrees_lost_data = 0; 944 write_sb = true; 945 } 946 947 if (c->opts.fsck && 948 !test_bit(BCH_FS_error, &c->flags) 949 !test_bit(BCH_FS_errors_not_fixed, 950 SET_BCH_SB_HAS_ERRORS(c->disk_ 951 SET_BCH_SB_HAS_TOPOLOGY_ERRORS 952 write_sb = true; 953 } 954 955 if (bch2_blacklist_entries_gc(c)) 956 write_sb = true; 957 958 if (write_sb) 959 bch2_write_super(c); 960 mutex_unlock(&c->sb_lock); 961 962 if (!(c->sb.compat & (1ULL << BCH_COMP 963 c->sb.version_min < bcachefs_metad 964 struct bch_move_stats stats; 965 966 bch2_move_stats_init(&stats, " 967 968 struct printbuf buf = PRINTBUF 969 bch2_version_to_text(&buf, c-> 970 bch_info(c, "scanning for old 971 printbuf_exit(&buf); 972 973 ret = bch2_fs_read_write_ear 974 bch2_scan_old_btree_no 975 if (ret) 976 goto err; 977 bch_info(c, "scanning for old 978 } 979 980 ret = 0; 981 out: 982 bch2_flush_fsck_errs(c); 983 984 if (!c->opts.retain_recovery_info) { 985 bch2_journal_keys_put_initial( 986 bch2_find_btree_nodes_exit(&c- 987 } 988 if (!IS_ERR(clean)) 989 kfree(clean); 990 991 if (!ret && 992 test_bit(BCH_FS_need_delete_dead_s 993 !c->opts.nochanges) { 994 bch2_fs_read_write_early(c); 995 bch2_delete_dead_snapshots_asy 996 } 997 998 bch_err_fn(c, ret); 999 return ret; 1000 err: 1001 fsck_err: 1002 bch2_fs_emergency_read_only(c); 1003 goto out; 1004 } 1005 1006 int bch2_fs_initialize(struct bch_fs *c) 1007 { 1008 struct bch_inode_unpacked root_inode, 1009 struct bkey_inode_buf packed_inode; 1010 struct qstr lostfound = QSTR("lost+fo 1011 struct bch_member *m; 1012 int ret; 1013 1014 bch_notice(c, "initializing new files 1015 set_bit(BCH_FS_new_fs, &c->flags); 1016 1017 mutex_lock(&c->sb_lock); 1018 c->disk_sb.sb->compat[0] |= cpu_to_le 1019 c->disk_sb.sb->compat[0] |= cpu_to_le 1020 1021 bch2_check_version_downgrade(c); 1022 1023 if (c->opts.version_upgrade != BCH_VE 1024 bch2_sb_upgrade(c, bcachefs_m 1025 SET_BCH_SB_VERSION_UPGRADE_CO 1026 bch2_write_super(c); 1027 } 1028 1029 for_each_member_device(c, ca) { 1030 m = bch2_members_v2_get_mut(c 1031 SET_BCH_MEMBER_FREESPACE_INIT 1032 ca->mi = bch2_mi_to_cpu(m); 1033 } 1034 1035 bch2_write_super(c); 1036 mutex_unlock(&c->sb_lock); 1037 1038 c->curr_recovery_pass = BCH_RECOVERY_ 1039 set_bit(BCH_FS_btree_running, &c->fla 1040 set_bit(BCH_FS_may_go_rw, &c->flags); 1041 1042 for (unsigned i = 0; i < BTREE_ID_NR; 1043 bch2_btree_root_alloc_fake(c, 1044 1045 ret = bch2_fs_journal_alloc(c); 1046 if (ret) 1047 goto err; 1048 1049 /* 1050 * journal_res_get() will crash if ca 1051 * set up the journal.pin FIFO and jo 1052 */ 1053 bch2_fs_journal_start(&c->journal, 1) 1054 set_bit(BCH_FS_accounting_replay_done 1055 bch2_journal_set_replay_done(&c->jour 1056 1057 ret = bch2_fs_read_write_early(c); 1058 if (ret) 1059 goto err; 1060 1061 for_each_member_device(c, ca) { 1062 ret = bch2_dev_usage_init(ca, 1063 if (ret) { 1064 bch2_dev_put(ca); 1065 goto err; 1066 } 1067 } 1068 1069 /* 1070 * Write out the superblock and journ 1071 * btree updates 1072 */ 1073 bch_verbose(c, "marking superblocks") 1074 ret = bch2_trans_mark_dev_sbs(c); 1075 bch_err_msg(c, ret, "marking superblo 1076 if (ret) 1077 goto err; 1078 1079 for_each_online_member(c, ca) 1080 ca->new_fs_bucket_idx = 0; 1081 1082 ret = bch2_fs_freespace_init(c); 1083 if (ret) 1084 goto err; 1085 1086 ret = bch2_initialize_subvolumes(c); 1087 if (ret) 1088 goto err; 1089 1090 bch_verbose(c, "reading snapshots tab 1091 ret = bch2_snapshots_read(c); 1092 if (ret) 1093 goto err; 1094 bch_verbose(c, "reading snapshots don 1095 1096 bch2_inode_init(c, &root_inode, 0, 0, 1097 root_inode.bi_inum = BCACHEFS_RO 1098 root_inode.bi_subvol = BCACHEFS_RO 1099 bch2_inode_pack(&packed_inode, &root_ 1100 packed_inode.inode.k.p.snapshot = U32 1101 1102 ret = bch2_btree_insert(c, BTREE_ID_i 1103 bch_err_msg(c, ret, "creating root di 1104 if (ret) 1105 goto err; 1106 1107 bch2_inode_init_early(c, &lostfound_i 1108 1109 ret = bch2_trans_commit_do(c, NULL, N 1110 bch2_create_trans(trans, 1111 BCACHEFS_RO 1112 &root_inode 1113 &lostfound, 1114 0, 0, S_IFD 1115 NULL, NULL, 1116 bch_err_msg(c, ret, "creating lost+fo 1117 if (ret) 1118 goto err; 1119 1120 c->recovery_pass_done = BCH_RECOVERY_ 1121 1122 if (enabled_qtypes(c)) { 1123 ret = bch2_fs_quota_read(c); 1124 if (ret) 1125 goto err; 1126 } 1127 1128 ret = bch2_journal_flush(&c->journal) 1129 bch_err_msg(c, ret, "writing first jo 1130 if (ret) 1131 goto err; 1132 1133 mutex_lock(&c->sb_lock); 1134 SET_BCH_SB_INITIALIZED(c->disk_sb.sb, 1135 SET_BCH_SB_CLEAN(c->disk_sb.sb, false 1136 1137 bch2_write_super(c); 1138 mutex_unlock(&c->sb_lock); 1139 1140 return 0; 1141 err: 1142 bch_err_fn(c, ret); 1143 return ret; 1144 } 1145
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.