1 // SPDX-License-Identifier: GPL-2.0 1 // SPDX-License-Identifier: GPL-2.0 2 2 3 #include "bcachefs.h" 3 #include "bcachefs.h" 4 #include "alloc_background.h" !! 4 #include "backpointers.h" 5 #include "bkey_buf.h" 5 #include "bkey_buf.h" >> 6 #include "alloc_background.h" >> 7 #include "btree_gc.h" 6 #include "btree_journal_iter.h" 8 #include "btree_journal_iter.h" 7 #include "btree_node_scan.h" << 8 #include "btree_update.h" 9 #include "btree_update.h" 9 #include "btree_update_interior.h" 10 #include "btree_update_interior.h" 10 #include "btree_io.h" 11 #include "btree_io.h" 11 #include "buckets.h" 12 #include "buckets.h" 12 #include "dirent.h" 13 #include "dirent.h" 13 #include "disk_accounting.h" !! 14 #include "ec.h" 14 #include "errcode.h" 15 #include "errcode.h" 15 #include "error.h" 16 #include "error.h" 16 #include "fs-common.h" 17 #include "fs-common.h" >> 18 #include "fsck.h" 17 #include "journal_io.h" 19 #include "journal_io.h" 18 #include "journal_reclaim.h" 20 #include "journal_reclaim.h" 19 #include "journal_seq_blacklist.h" 21 #include "journal_seq_blacklist.h" >> 22 #include "lru.h" 20 #include "logged_ops.h" 23 #include "logged_ops.h" 21 #include "move.h" 24 #include "move.h" 22 #include "quota.h" 25 #include "quota.h" 23 #include "rebalance.h" 26 #include "rebalance.h" 24 #include "recovery.h" 27 #include "recovery.h" 25 #include "recovery_passes.h" << 26 #include "replicas.h" 28 #include "replicas.h" 27 #include "sb-clean.h" 29 #include "sb-clean.h" 28 #include "sb-downgrade.h" 30 #include "sb-downgrade.h" 29 #include "snapshot.h" 31 #include "snapshot.h" >> 32 #include "subvolume.h" 30 #include "super-io.h" 33 #include "super-io.h" 31 34 32 #include <linux/sort.h> 35 #include <linux/sort.h> 33 #include <linux/stat.h> 36 #include <linux/stat.h> 34 37 35 #define QSTR(n) { { { .len = strlen(n) } }, .n 38 #define QSTR(n) { { { .len = strlen(n) } }, .name = n } 36 39 37 void bch2_btree_lost_data(struct bch_fs *c, en !! 40 static bool btree_id_is_alloc(enum btree_id id) 38 { 41 { 39 if (btree >= BTREE_ID_NR_MAX) !! 42 switch (id) { 40 return; !! 43 case BTREE_ID_alloc: 41 !! 44 case BTREE_ID_backpointers: 42 u64 b = BIT_ULL(btree); !! 45 case BTREE_ID_need_discard: 43 !! 46 case BTREE_ID_freespace: 44 if (!(c->sb.btrees_lost_data & b)) { !! 47 case BTREE_ID_bucket_gens: 45 bch_err(c, "flagging btree %s !! 48 return true; 46 !! 49 default: 47 mutex_lock(&c->sb_lock); !! 50 return false; 48 bch2_sb_field_get(c->disk_sb.s << 49 bch2_write_super(c); << 50 mutex_unlock(&c->sb_lock); << 51 } 51 } 52 } 52 } 53 53 54 /* for -o reconstruct_alloc: */ 54 /* for -o reconstruct_alloc: */ 55 static void bch2_reconstruct_alloc(struct bch_ !! 55 static void drop_alloc_keys(struct journal_keys *keys) 56 { 56 { 57 bch2_journal_log_msg(c, "dropping allo !! 57 size_t src, dst; 58 bch_info(c, "dropping and reconstructi << 59 58 60 mutex_lock(&c->sb_lock); !! 59 for (src = 0, dst = 0; src < keys->nr; src++) 61 struct bch_sb_field_ext *ext = bch2_sb !! 60 if (!btree_id_is_alloc(keys->d[src].btree_id)) 62 !! 61 keys->d[dst++] = keys->d[src]; 63 __set_bit_le64(BCH_RECOVERY_PASS_STABL << 64 __set_bit_le64(BCH_RECOVERY_PASS_STABL << 65 __set_bit_le64(BCH_RECOVERY_PASS_STABL << 66 __set_bit_le64(BCH_RECOVERY_PASS_STABL << 67 __set_bit_le64(BCH_RECOVERY_PASS_STABL << 68 << 69 __set_bit_le64(BCH_FSCK_ERR_ptr_to_mis << 70 __set_bit_le64(BCH_FSCK_ERR_ptr_gen_ne << 71 __set_bit_le64(BCH_FSCK_ERR_stale_dirt << 72 << 73 __set_bit_le64(BCH_FSCK_ERR_dev_usage_ << 74 __set_bit_le64(BCH_FSCK_ERR_dev_usage_ << 75 __set_bit_le64(BCH_FSCK_ERR_dev_usage_ << 76 << 77 __set_bit_le64(BCH_FSCK_ERR_fs_usage_b << 78 __set_bit_le64(BCH_FSCK_ERR_fs_usage_c << 79 __set_bit_le64(BCH_FSCK_ERR_fs_usage_p << 80 __set_bit_le64(BCH_FSCK_ERR_fs_usage_r << 81 << 82 __set_bit_le64(BCH_FSCK_ERR_alloc_key_ << 83 __set_bit_le64(BCH_FSCK_ERR_alloc_key_ << 84 __set_bit_le64(BCH_FSCK_ERR_alloc_key_ << 85 __set_bit_le64(BCH_FSCK_ERR_alloc_key_ << 86 __set_bit_le64(BCH_FSCK_ERR_alloc_key_ << 87 __set_bit_le64(BCH_FSCK_ERR_alloc_key_ << 88 __set_bit_le64(BCH_FSCK_ERR_need_disca << 89 __set_bit_le64(BCH_FSCK_ERR_freespace_ << 90 __set_bit_le64(BCH_FSCK_ERR_bucket_gen << 91 __set_bit_le64(BCH_FSCK_ERR_freespace_ << 92 __set_bit_le64(BCH_FSCK_ERR_ptr_to_mis << 93 __set_bit_le64(BCH_FSCK_ERR_lru_entry_ << 94 __set_bit_le64(BCH_FSCK_ERR_accounting << 95 c->sb.compat &= ~(1ULL << BCH_COMPAT_a << 96 << 97 c->opts.recovery_passes |= bch2_recove << 98 << 99 bch2_write_super(c); << 100 mutex_unlock(&c->sb_lock); << 101 62 102 bch2_shoot_down_journal_keys(c, BTREE_ !! 63 keys->nr = dst; 103 0, BTREE_ << 104 bch2_shoot_down_journal_keys(c, BTREE_ << 105 0, BTREE_ << 106 bch2_shoot_down_journal_keys(c, BTREE_ << 107 0, BTREE_ << 108 bch2_shoot_down_journal_keys(c, BTREE_ << 109 0, BTREE_ << 110 bch2_shoot_down_journal_keys(c, BTREE_ << 111 0, BTREE_ << 112 } 64 } 113 65 114 /* 66 /* 115 * Btree node pointers have a field to stack a 67 * Btree node pointers have a field to stack a pointer to the in memory btree 116 * node; we need to zero out this field when r 68 * node; we need to zero out this field when reading in btree nodes, or when 117 * reading in keys from the journal: 69 * reading in keys from the journal: 118 */ 70 */ 119 static void zero_out_btree_mem_ptr(struct jour 71 static void zero_out_btree_mem_ptr(struct journal_keys *keys) 120 { 72 { 121 darray_for_each(*keys, i) !! 73 struct journal_key *i; >> 74 >> 75 for (i = keys->d; i < keys->d + keys->nr; i++) 122 if (i->k->k.type == KEY_TYPE_b 76 if (i->k->k.type == KEY_TYPE_btree_ptr_v2) 123 bkey_i_to_btree_ptr_v2 77 bkey_i_to_btree_ptr_v2(i->k)->v.mem_ptr = 0; 124 } 78 } 125 79 126 /* journal replay: */ 80 /* journal replay: */ 127 81 128 static void replay_now_at(struct journal *j, u 82 static void replay_now_at(struct journal *j, u64 seq) 129 { 83 { 130 BUG_ON(seq < j->replay_journal_seq); 84 BUG_ON(seq < j->replay_journal_seq); 131 85 132 seq = min(seq, j->replay_journal_seq_e 86 seq = min(seq, j->replay_journal_seq_end); 133 87 134 while (j->replay_journal_seq < seq) 88 while (j->replay_journal_seq < seq) 135 bch2_journal_pin_put(j, j->rep 89 bch2_journal_pin_put(j, j->replay_journal_seq++); 136 } 90 } 137 91 138 static int bch2_journal_replay_accounting_key( << 139 << 140 { << 141 struct btree_iter iter; << 142 bch2_trans_node_iter_init(trans, &iter << 143 BTREE_MAX_DE << 144 BTREE_ITER_i << 145 int ret = bch2_btree_iter_traverse(&it << 146 if (ret) << 147 goto out; << 148 << 149 struct bkey u; << 150 struct bkey_s_c old = bch2_btree_path_ << 151 << 152 /* Has this delta already been applied << 153 if (bversion_cmp(old.k->bversion, k->k << 154 ret = 0; << 155 goto out; << 156 } << 157 << 158 struct bkey_i *new = k->k; << 159 if (old.k->type == KEY_TYPE_accounting << 160 new = bch2_bkey_make_mut_noupd << 161 ret = PTR_ERR_OR_ZERO(new); << 162 if (ret) << 163 goto out; << 164 << 165 bch2_accounting_accumulate(bke << 166 bke << 167 } << 168 << 169 trans->journal_res.seq = k->journal_se << 170 << 171 ret = bch2_trans_update(trans, &iter, << 172 out: << 173 bch2_trans_iter_exit(trans, &iter); << 174 return ret; << 175 } << 176 << 177 static int bch2_journal_replay_key(struct btre 92 static int bch2_journal_replay_key(struct btree_trans *trans, 178 struct jour 93 struct journal_key *k) 179 { 94 { 180 struct btree_iter iter; 95 struct btree_iter iter; 181 unsigned iter_flags = 96 unsigned iter_flags = 182 BTREE_ITER_intent| !! 97 BTREE_ITER_INTENT| 183 BTREE_ITER_not_extents; !! 98 BTREE_ITER_NOT_EXTENTS; 184 unsigned update_flags = BTREE_TRIGGER_ !! 99 unsigned update_flags = BTREE_TRIGGER_NORUN; 185 int ret; 100 int ret; 186 101 187 if (k->overwritten) 102 if (k->overwritten) 188 return 0; 103 return 0; 189 104 190 trans->journal_res.seq = k->journal_se 105 trans->journal_res.seq = k->journal_seq; 191 106 192 /* 107 /* 193 * BTREE_UPDATE_key_cache_reclaim disa !! 108 * BTREE_UPDATE_KEY_CACHE_RECLAIM disables key cache lookup/update to 194 * keep the key cache coherent with th 109 * keep the key cache coherent with the underlying btree. Nothing 195 * besides the allocator is doing upda 110 * besides the allocator is doing updates yet so we don't need key cache 196 * coherency for non-alloc btrees, and 111 * coherency for non-alloc btrees, and key cache fills for snapshots 197 * btrees use BTREE_ITER_filter_snapsh !! 112 * btrees use BTREE_ITER_FILTER_SNAPSHOTS, which isn't available until 198 * the snapshots recovery pass runs. 113 * the snapshots recovery pass runs. 199 */ 114 */ 200 if (!k->level && k->btree_id == BTREE_ 115 if (!k->level && k->btree_id == BTREE_ID_alloc) 201 iter_flags |= BTREE_ITER_cache !! 116 iter_flags |= BTREE_ITER_CACHED; 202 else 117 else 203 update_flags |= BTREE_UPDATE_k !! 118 update_flags |= BTREE_UPDATE_KEY_CACHE_RECLAIM; 204 119 205 bch2_trans_node_iter_init(trans, &iter 120 bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p, 206 BTREE_MAX_DE 121 BTREE_MAX_DEPTH, k->level, 207 iter_flags); 122 iter_flags); 208 ret = bch2_btree_iter_traverse(&iter); 123 ret = bch2_btree_iter_traverse(&iter); 209 if (ret) 124 if (ret) 210 goto out; 125 goto out; 211 126 212 struct btree_path *path = btree_iter_p << 213 if (unlikely(!btree_path_node(path, k- << 214 bch2_trans_iter_exit(trans, &i << 215 bch2_trans_node_iter_init(tran << 216 BTRE << 217 ret = bch2_btree_iter_traver << 218 bch2_btree_increase_de << 219 -BCH_ERR_transaction_r << 220 goto out; << 221 } << 222 << 223 /* Must be checked with btree locked: 127 /* Must be checked with btree locked: */ 224 if (k->overwritten) 128 if (k->overwritten) 225 goto out; 129 goto out; 226 130 227 if (k->k->k.type == KEY_TYPE_accountin << 228 ret = bch2_trans_update_buffer << 229 goto out; << 230 } << 231 << 232 ret = bch2_trans_update(trans, &iter, 131 ret = bch2_trans_update(trans, &iter, k->k, update_flags); 233 out: 132 out: 234 bch2_trans_iter_exit(trans, &iter); 133 bch2_trans_iter_exit(trans, &iter); 235 return ret; 134 return ret; 236 } 135 } 237 136 238 static int journal_sort_seq_cmp(const void *_l 137 static int journal_sort_seq_cmp(const void *_l, const void *_r) 239 { 138 { 240 const struct journal_key *l = *((const 139 const struct journal_key *l = *((const struct journal_key **)_l); 241 const struct journal_key *r = *((const 140 const struct journal_key *r = *((const struct journal_key **)_r); 242 141 243 /* !! 142 return cmp_int(l->journal_seq, r->journal_seq); 244 * Map 0 to U64_MAX, so that keys with << 245 * << 246 * journal_seq == 0 means that the key << 247 * should be inserted last so as to av << 248 */ << 249 return cmp_int(l->journal_seq - 1, r-> << 250 } 143 } 251 144 252 int bch2_journal_replay(struct bch_fs *c) !! 145 static int bch2_journal_replay(struct bch_fs *c) 253 { 146 { 254 struct journal_keys *keys = &c->journa 147 struct journal_keys *keys = &c->journal_keys; 255 DARRAY(struct journal_key *) keys_sort 148 DARRAY(struct journal_key *) keys_sorted = { 0 }; 256 struct journal *j = &c->journal; 149 struct journal *j = &c->journal; 257 u64 start_seq = c->journal_replay_se 150 u64 start_seq = c->journal_replay_seq_start; 258 u64 end_seq = c->journal_replay_se 151 u64 end_seq = c->journal_replay_seq_start; 259 struct btree_trans *trans = NULL; !! 152 struct btree_trans *trans = bch2_trans_get(c); 260 bool immediate_flush = false; << 261 int ret = 0; 153 int ret = 0; 262 154 263 if (keys->nr) { 155 if (keys->nr) { 264 ret = bch2_journal_log_msg(c, 156 ret = bch2_journal_log_msg(c, "Starting journal replay (%zu keys in entries %llu-%llu)", 265 key 157 keys->nr, start_seq, end_seq); 266 if (ret) 158 if (ret) 267 goto err; 159 goto err; 268 } 160 } 269 161 270 BUG_ON(!atomic_read(&keys->ref)); 162 BUG_ON(!atomic_read(&keys->ref)); 271 163 272 move_gap(keys, keys->nr); << 273 trans = bch2_trans_get(c); << 274 << 275 /* << 276 * Replay accounting keys first: we ca << 277 * flush accounting keys until we're d << 278 */ << 279 darray_for_each(*keys, k) { << 280 if (!(k->k->k.type == KEY_TYPE << 281 continue; << 282 << 283 cond_resched(); << 284 << 285 ret = commit_do(trans, NULL, N << 286 BCH_TRANS_COMM << 287 BCH_TRANS_COMM << 288 BCH_TRANS_COMM << 289 BCH_TRANS_COMM << 290 BCH_WATERMARK_ << 291 bch2_journal_repl << 292 if (bch2_fs_fatal_err_on(ret, << 293 goto err; << 294 << 295 k->overwritten = true; << 296 } << 297 << 298 set_bit(BCH_FS_accounting_replay_done, << 299 << 300 /* 164 /* 301 * First, attempt to replay keys in so 165 * First, attempt to replay keys in sorted order. This is more 302 * efficient - better locality of btre 166 * efficient - better locality of btree access - but some might fail if 303 * that would cause a journal deadlock 167 * that would cause a journal deadlock. 304 */ 168 */ 305 darray_for_each(*keys, k) { !! 169 for (size_t i = 0; i < keys->nr; i++) { 306 cond_resched(); 170 cond_resched(); 307 171 308 /* !! 172 struct journal_key *k = keys->d + i; 309 * k->allocated means the key << 310 * rather it was from early re << 311 */ << 312 if (k->allocated) << 313 immediate_flush = true << 314 173 315 /* Skip fastpath if we're low 174 /* Skip fastpath if we're low on space in the journal */ 316 ret = c->journal.watermark ? - 175 ret = c->journal.watermark ? -1 : 317 commit_do(trans, NULL, 176 commit_do(trans, NULL, NULL, 318 BCH_TRANS_CO 177 BCH_TRANS_COMMIT_no_enospc| 319 BCH_TRANS_CO 178 BCH_TRANS_COMMIT_journal_reclaim| 320 BCH_TRANS_CO << 321 (!k->allocat 179 (!k->allocated ? BCH_TRANS_COMMIT_no_journal_res : 0), 322 bch2_journal_repl 180 bch2_journal_replay_key(trans, k)); 323 BUG_ON(!ret && !k->overwritten !! 181 BUG_ON(!ret && !k->overwritten); 324 if (ret) { 182 if (ret) { 325 ret = darray_push(&key 183 ret = darray_push(&keys_sorted, k); 326 if (ret) 184 if (ret) 327 goto err; 185 goto err; 328 } 186 } 329 } 187 } 330 188 331 bch2_trans_unlock_long(trans); << 332 /* 189 /* 333 * Now, replay any remaining keys in t 190 * Now, replay any remaining keys in the order in which they appear in 334 * the journal, unpinning those journa 191 * the journal, unpinning those journal entries as we go: 335 */ 192 */ 336 sort(keys_sorted.data, keys_sorted.nr, 193 sort(keys_sorted.data, keys_sorted.nr, 337 sizeof(keys_sorted.data[0]), 194 sizeof(keys_sorted.data[0]), 338 journal_sort_seq_cmp, NULL); 195 journal_sort_seq_cmp, NULL); 339 196 340 darray_for_each(keys_sorted, kp) { 197 darray_for_each(keys_sorted, kp) { 341 cond_resched(); 198 cond_resched(); 342 199 343 struct journal_key *k = *kp; 200 struct journal_key *k = *kp; 344 201 345 if (k->journal_seq) !! 202 replay_now_at(j, k->journal_seq); 346 replay_now_at(j, k->jo << 347 else << 348 replay_now_at(j, j->re << 349 203 350 ret = commit_do(trans, NULL, N 204 ret = commit_do(trans, NULL, NULL, 351 BCH_TRANS_COMM 205 BCH_TRANS_COMMIT_no_enospc| 352 BCH_TRANS_COMM << 353 (!k->allocated 206 (!k->allocated 354 ? BCH_TRANS_C 207 ? BCH_TRANS_COMMIT_no_journal_res|BCH_WATERMARK_reclaim 355 : 0), 208 : 0), 356 bch2_journal_repl 209 bch2_journal_replay_key(trans, k)); 357 bch_err_msg(c, ret, "while rep 210 bch_err_msg(c, ret, "while replaying key at btree %s level %u:", 358 bch2_btree_id_str( 211 bch2_btree_id_str(k->btree_id), k->level); 359 if (ret) 212 if (ret) 360 goto err; 213 goto err; 361 214 362 BUG_ON(k->btree_id != BTREE_ID !! 215 BUG_ON(!k->overwritten); 363 } 216 } 364 217 365 /* 218 /* 366 * We need to put our btree_trans befo 219 * We need to put our btree_trans before calling flush_all_pins(), since 367 * that will use a btree_trans interna 220 * that will use a btree_trans internally 368 */ 221 */ 369 bch2_trans_put(trans); 222 bch2_trans_put(trans); 370 trans = NULL; 223 trans = NULL; 371 224 372 if (!c->opts.retain_recovery_info && !! 225 if (!c->opts.keep_journal) 373 c->recovery_pass_done >= BCH_RECOV << 374 bch2_journal_keys_put_initial( 226 bch2_journal_keys_put_initial(c); 375 227 376 replay_now_at(j, j->replay_journal_seq 228 replay_now_at(j, j->replay_journal_seq_end); 377 j->replay_journal_seq = 0; 229 j->replay_journal_seq = 0; 378 230 379 bch2_journal_set_replay_done(j); 231 bch2_journal_set_replay_done(j); 380 232 381 /* if we did any repair, flush it imme << 382 if (immediate_flush) { << 383 bch2_journal_flush_all_pins(&c << 384 ret = bch2_journal_meta(&c->jo << 385 } << 386 << 387 if (keys->nr) 233 if (keys->nr) 388 bch2_journal_log_msg(c, "journ 234 bch2_journal_log_msg(c, "journal replay finished"); 389 err: 235 err: 390 if (trans) 236 if (trans) 391 bch2_trans_put(trans); 237 bch2_trans_put(trans); 392 darray_exit(&keys_sorted); 238 darray_exit(&keys_sorted); 393 bch_err_fn(c, ret); 239 bch_err_fn(c, ret); 394 return ret; 240 return ret; 395 } 241 } 396 242 397 /* journal replay early: */ 243 /* journal replay early: */ 398 244 399 static int journal_replay_entry_early(struct b 245 static int journal_replay_entry_early(struct bch_fs *c, 400 struct j 246 struct jset_entry *entry) 401 { 247 { 402 int ret = 0; 248 int ret = 0; 403 249 404 switch (entry->type) { 250 switch (entry->type) { 405 case BCH_JSET_ENTRY_btree_root: { 251 case BCH_JSET_ENTRY_btree_root: { 406 struct btree_root *r; 252 struct btree_root *r; 407 253 408 if (fsck_err_on(entry->btree_i << 409 c, invalid_btr << 410 "invalid btree << 411 entry->btree_i << 412 return 0; << 413 << 414 while (entry->btree_id >= c->b 254 while (entry->btree_id >= c->btree_roots_extra.nr + BTREE_ID_NR) { 415 ret = darray_push(&c-> 255 ret = darray_push(&c->btree_roots_extra, (struct btree_root) { NULL }); 416 if (ret) 256 if (ret) 417 return ret; 257 return ret; 418 } 258 } 419 259 420 r = bch2_btree_id_root(c, entr 260 r = bch2_btree_id_root(c, entry->btree_id); 421 261 422 if (entry->u64s) { 262 if (entry->u64s) { 423 r->level = entry->leve 263 r->level = entry->level; 424 bkey_copy(&r->key, (st 264 bkey_copy(&r->key, (struct bkey_i *) entry->start); 425 r->error = 0; 265 r->error = 0; 426 } else { 266 } else { 427 r->error = -BCH_ERR_bt !! 267 r->error = -EIO; 428 } 268 } 429 r->alive = true; 269 r->alive = true; 430 break; 270 break; 431 } 271 } 432 case BCH_JSET_ENTRY_usage: { 272 case BCH_JSET_ENTRY_usage: { 433 struct jset_entry_usage *u = 273 struct jset_entry_usage *u = 434 container_of(entry, st 274 container_of(entry, struct jset_entry_usage, entry); 435 275 436 switch (entry->btree_id) { 276 switch (entry->btree_id) { >> 277 case BCH_FS_USAGE_reserved: >> 278 if (entry->level < BCH_REPLICAS_MAX) >> 279 c->usage_base->persistent_reserved[entry->level] = >> 280 le64_to_cpu(u->v); >> 281 break; >> 282 case BCH_FS_USAGE_inodes: >> 283 c->usage_base->b.nr_inodes = le64_to_cpu(u->v); >> 284 break; 437 case BCH_FS_USAGE_key_version: 285 case BCH_FS_USAGE_key_version: 438 atomic64_set(&c->key_v !! 286 atomic64_set(&c->key_version, >> 287 le64_to_cpu(u->v)); 439 break; 288 break; 440 } 289 } >> 290 >> 291 break; >> 292 } >> 293 case BCH_JSET_ENTRY_data_usage: { >> 294 struct jset_entry_data_usage *u = >> 295 container_of(entry, struct jset_entry_data_usage, entry); >> 296 >> 297 ret = bch2_replicas_set_usage(c, &u->r, >> 298 le64_to_cpu(u->v)); >> 299 break; >> 300 } >> 301 case BCH_JSET_ENTRY_dev_usage: { >> 302 struct jset_entry_dev_usage *u = >> 303 container_of(entry, struct jset_entry_dev_usage, entry); >> 304 struct bch_dev *ca = bch_dev_bkey_exists(c, le32_to_cpu(u->dev)); >> 305 unsigned i, nr_types = jset_entry_dev_usage_nr_types(u); >> 306 >> 307 for (i = 0; i < min_t(unsigned, nr_types, BCH_DATA_NR); i++) { >> 308 ca->usage_base->d[i].buckets = le64_to_cpu(u->d[i].buckets); >> 309 ca->usage_base->d[i].sectors = le64_to_cpu(u->d[i].sectors); >> 310 ca->usage_base->d[i].fragmented = le64_to_cpu(u->d[i].fragmented); >> 311 } >> 312 441 break; 313 break; 442 } 314 } 443 case BCH_JSET_ENTRY_blacklist: { 315 case BCH_JSET_ENTRY_blacklist: { 444 struct jset_entry_blacklist *b 316 struct jset_entry_blacklist *bl_entry = 445 container_of(entry, st 317 container_of(entry, struct jset_entry_blacklist, entry); 446 318 447 ret = bch2_journal_seq_blackli 319 ret = bch2_journal_seq_blacklist_add(c, 448 le64_to_cpu(bl 320 le64_to_cpu(bl_entry->seq), 449 le64_to_cpu(bl 321 le64_to_cpu(bl_entry->seq) + 1); 450 break; 322 break; 451 } 323 } 452 case BCH_JSET_ENTRY_blacklist_v2: { 324 case BCH_JSET_ENTRY_blacklist_v2: { 453 struct jset_entry_blacklist_v2 325 struct jset_entry_blacklist_v2 *bl_entry = 454 container_of(entry, st 326 container_of(entry, struct jset_entry_blacklist_v2, entry); 455 327 456 ret = bch2_journal_seq_blackli 328 ret = bch2_journal_seq_blacklist_add(c, 457 le64_to_cpu(bl 329 le64_to_cpu(bl_entry->start), 458 le64_to_cpu(bl 330 le64_to_cpu(bl_entry->end) + 1); 459 break; 331 break; 460 } 332 } 461 case BCH_JSET_ENTRY_clock: { 333 case BCH_JSET_ENTRY_clock: { 462 struct jset_entry_clock *clock 334 struct jset_entry_clock *clock = 463 container_of(entry, st 335 container_of(entry, struct jset_entry_clock, entry); 464 336 465 atomic64_set(&c->io_clock[cloc 337 atomic64_set(&c->io_clock[clock->rw].now, le64_to_cpu(clock->time)); 466 } 338 } 467 } 339 } 468 fsck_err: !! 340 469 return ret; 341 return ret; 470 } 342 } 471 343 472 static int journal_replay_early(struct bch_fs 344 static int journal_replay_early(struct bch_fs *c, 473 struct bch_sb_ 345 struct bch_sb_field_clean *clean) 474 { 346 { 475 if (clean) { 347 if (clean) { 476 for (struct jset_entry *entry 348 for (struct jset_entry *entry = clean->start; 477 entry != vstruct_end(&cle 349 entry != vstruct_end(&clean->field); 478 entry = vstruct_next(entr 350 entry = vstruct_next(entry)) { 479 int ret = journal_repl 351 int ret = journal_replay_entry_early(c, entry); 480 if (ret) 352 if (ret) 481 return ret; 353 return ret; 482 } 354 } 483 } else { 355 } else { 484 struct genradix_iter iter; 356 struct genradix_iter iter; 485 struct journal_replay *i, **_i 357 struct journal_replay *i, **_i; 486 358 487 genradix_for_each(&c->journal_ 359 genradix_for_each(&c->journal_entries, iter, _i) { 488 i = *_i; 360 i = *_i; 489 361 490 if (journal_replay_ign !! 362 if (!i || i->ignore) 491 continue; 363 continue; 492 364 493 vstruct_for_each(&i->j 365 vstruct_for_each(&i->j, entry) { 494 int ret = jour 366 int ret = journal_replay_entry_early(c, entry); 495 if (ret) 367 if (ret) 496 return 368 return ret; 497 } 369 } 498 } 370 } 499 } 371 } 500 372 >> 373 bch2_fs_usage_initialize(c); >> 374 501 return 0; 375 return 0; 502 } 376 } 503 377 504 /* sb clean section: */ 378 /* sb clean section: */ 505 379 506 static int read_btree_roots(struct bch_fs *c) 380 static int read_btree_roots(struct bch_fs *c) 507 { 381 { >> 382 unsigned i; 508 int ret = 0; 383 int ret = 0; 509 384 510 for (unsigned i = 0; i < btree_id_nr_a !! 385 for (i = 0; i < btree_id_nr_alive(c); i++) { 511 struct btree_root *r = bch2_bt 386 struct btree_root *r = bch2_btree_id_root(c, i); 512 387 513 if (!r->alive) 388 if (!r->alive) 514 continue; 389 continue; 515 390 516 if (btree_id_is_alloc(i) && c- !! 391 if (btree_id_is_alloc(i) && >> 392 c->opts.reconstruct_alloc) { >> 393 c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info); 517 continue; 394 continue; >> 395 } 518 396 519 if (mustfix_fsck_err_on((ret = !! 397 if (r->error) { 520 c, btr !! 398 __fsck_err(c, 521 "inval !! 399 btree_id_is_alloc(i) 522 bch2_b !! 400 ? FSCK_CAN_IGNORE : 0, 523 mustfix_fsck_err_on((ret = !! 401 btree_root_bkey_invalid, 524 c, btr !! 402 "invalid btree root %s", 525 "error !! 403 bch2_btree_id_str(i)); 526 bch2_b !! 404 if (i == BTREE_ID_alloc) 527 if (btree_id_is_alloc( << 528 c->opts.recove << 529 c->opts.recove << 530 c->opts.recove << 531 c->opts.recove << 532 c->opts.recove << 533 c->sb.compat & 405 c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info); 534 r->error = 0; !! 406 } 535 } else if (!(c->opts.r << 536 bch_info(c, "w << 537 c->opts.recove << 538 c->opts.recove << 539 } << 540 407 >> 408 ret = bch2_btree_root_read(c, i, &r->key, r->level); >> 409 if (ret) { >> 410 fsck_err(c, >> 411 btree_root_read_error, >> 412 "error reading btree root %s", >> 413 bch2_btree_id_str(i)); >> 414 if (btree_id_is_alloc(i)) >> 415 c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info); 541 ret = 0; 416 ret = 0; 542 bch2_btree_lost_data(c << 543 } 417 } 544 } 418 } 545 419 546 for (unsigned i = 0; i < BTREE_ID_NR; !! 420 for (i = 0; i < BTREE_ID_NR; i++) { 547 struct btree_root *r = bch2_bt 421 struct btree_root *r = bch2_btree_id_root(c, i); 548 422 549 if (!r->b && !r->error) { !! 423 if (!r->b) { 550 r->alive = false; 424 r->alive = false; 551 r->level = 0; 425 r->level = 0; 552 bch2_btree_root_alloc_ !! 426 bch2_btree_root_alloc(c, i); 553 } 427 } 554 } 428 } 555 fsck_err: 429 fsck_err: 556 return ret; 430 return ret; 557 } 431 } 558 432 >> 433 static int bch2_initialize_subvolumes(struct bch_fs *c) >> 434 { >> 435 struct bkey_i_snapshot_tree root_tree; >> 436 struct bkey_i_snapshot root_snapshot; >> 437 struct bkey_i_subvolume root_volume; >> 438 int ret; >> 439 >> 440 bkey_snapshot_tree_init(&root_tree.k_i); >> 441 root_tree.k.p.offset = 1; >> 442 root_tree.v.master_subvol = cpu_to_le32(1); >> 443 root_tree.v.root_snapshot = cpu_to_le32(U32_MAX); >> 444 >> 445 bkey_snapshot_init(&root_snapshot.k_i); >> 446 root_snapshot.k.p.offset = U32_MAX; >> 447 root_snapshot.v.flags = 0; >> 448 root_snapshot.v.parent = 0; >> 449 root_snapshot.v.subvol = cpu_to_le32(BCACHEFS_ROOT_SUBVOL); >> 450 root_snapshot.v.tree = cpu_to_le32(1); >> 451 SET_BCH_SNAPSHOT_SUBVOL(&root_snapshot.v, true); >> 452 >> 453 bkey_subvolume_init(&root_volume.k_i); >> 454 root_volume.k.p.offset = BCACHEFS_ROOT_SUBVOL; >> 455 root_volume.v.flags = 0; >> 456 root_volume.v.snapshot = cpu_to_le32(U32_MAX); >> 457 root_volume.v.inode = cpu_to_le64(BCACHEFS_ROOT_INO); >> 458 >> 459 ret = bch2_btree_insert(c, BTREE_ID_snapshot_trees, &root_tree.k_i, NULL, 0) ?: >> 460 bch2_btree_insert(c, BTREE_ID_snapshots, &root_snapshot.k_i, NULL, 0) ?: >> 461 bch2_btree_insert(c, BTREE_ID_subvolumes, &root_volume.k_i, NULL, 0); >> 462 bch_err_fn(c, ret); >> 463 return ret; >> 464 } >> 465 >> 466 static int __bch2_fs_upgrade_for_subvolumes(struct btree_trans *trans) >> 467 { >> 468 struct btree_iter iter; >> 469 struct bkey_s_c k; >> 470 struct bch_inode_unpacked inode; >> 471 int ret; >> 472 >> 473 k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, >> 474 SPOS(0, BCACHEFS_ROOT_INO, U32_MAX), 0); >> 475 ret = bkey_err(k); >> 476 if (ret) >> 477 return ret; >> 478 >> 479 if (!bkey_is_inode(k.k)) { >> 480 bch_err(trans->c, "root inode not found"); >> 481 ret = -BCH_ERR_ENOENT_inode; >> 482 goto err; >> 483 } >> 484 >> 485 ret = bch2_inode_unpack(k, &inode); >> 486 BUG_ON(ret); >> 487 >> 488 inode.bi_subvol = BCACHEFS_ROOT_SUBVOL; >> 489 >> 490 ret = bch2_inode_write(trans, &iter, &inode); >> 491 err: >> 492 bch2_trans_iter_exit(trans, &iter); >> 493 return ret; >> 494 } >> 495 >> 496 /* set bi_subvol on root inode */ >> 497 noinline_for_stack >> 498 static int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c) >> 499 { >> 500 int ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_lazy_rw, >> 501 __bch2_fs_upgrade_for_subvolumes(trans)); >> 502 bch_err_fn(c, ret); >> 503 return ret; >> 504 } >> 505 >> 506 const char * const bch2_recovery_passes[] = { >> 507 #define x(_fn, ...) #_fn, >> 508 BCH_RECOVERY_PASSES() >> 509 #undef x >> 510 NULL >> 511 }; >> 512 >> 513 static int bch2_check_allocations(struct bch_fs *c) >> 514 { >> 515 return bch2_gc(c, true, c->opts.norecovery); >> 516 } >> 517 >> 518 static int bch2_set_may_go_rw(struct bch_fs *c) >> 519 { >> 520 struct journal_keys *keys = &c->journal_keys; >> 521 >> 522 /* >> 523 * After we go RW, the journal keys buffer can't be modified (except for >> 524 * setting journal_key->overwritten: it will be accessed by multiple >> 525 * threads >> 526 */ >> 527 move_gap(keys->d, keys->nr, keys->size, keys->gap, keys->nr); >> 528 keys->gap = keys->nr; >> 529 >> 530 set_bit(BCH_FS_may_go_rw, &c->flags); >> 531 >> 532 if (keys->nr || c->opts.fsck || !c->sb.clean) >> 533 return bch2_fs_read_write_early(c); >> 534 return 0; >> 535 } >> 536 >> 537 struct recovery_pass_fn { >> 538 int (*fn)(struct bch_fs *); >> 539 unsigned when; >> 540 }; >> 541 >> 542 static struct recovery_pass_fn recovery_pass_fns[] = { >> 543 #define x(_fn, _id, _when) { .fn = bch2_##_fn, .when = _when }, >> 544 BCH_RECOVERY_PASSES() >> 545 #undef x >> 546 }; >> 547 >> 548 u64 bch2_recovery_passes_to_stable(u64 v) >> 549 { >> 550 static const u8 map[] = { >> 551 #define x(n, id, ...) [BCH_RECOVERY_PASS_##n] = BCH_RECOVERY_PASS_STABLE_##n, >> 552 BCH_RECOVERY_PASSES() >> 553 #undef x >> 554 }; >> 555 >> 556 u64 ret = 0; >> 557 for (unsigned i = 0; i < ARRAY_SIZE(map); i++) >> 558 if (v & BIT_ULL(i)) >> 559 ret |= BIT_ULL(map[i]); >> 560 return ret; >> 561 } >> 562 >> 563 u64 bch2_recovery_passes_from_stable(u64 v) >> 564 { >> 565 static const u8 map[] = { >> 566 #define x(n, id, ...) [BCH_RECOVERY_PASS_STABLE_##n] = BCH_RECOVERY_PASS_##n, >> 567 BCH_RECOVERY_PASSES() >> 568 #undef x >> 569 }; >> 570 >> 571 u64 ret = 0; >> 572 for (unsigned i = 0; i < ARRAY_SIZE(map); i++) >> 573 if (v & BIT_ULL(i)) >> 574 ret |= BIT_ULL(map[i]); >> 575 return ret; >> 576 } >> 577 559 static bool check_version_upgrade(struct bch_f 578 static bool check_version_upgrade(struct bch_fs *c) 560 { 579 { 561 unsigned latest_version = bcachefs_met 580 unsigned latest_version = bcachefs_metadata_version_current; 562 unsigned latest_compatible = min(lates 581 unsigned latest_compatible = min(latest_version, 563 bch2_ 582 bch2_latest_compatible_version(c->sb.version)); 564 unsigned old_version = c->sb.version_u 583 unsigned old_version = c->sb.version_upgrade_complete ?: c->sb.version; 565 unsigned new_version = 0; 584 unsigned new_version = 0; 566 585 567 if (old_version < bcachefs_metadata_re 586 if (old_version < bcachefs_metadata_required_upgrade_below) { 568 if (c->opts.version_upgrade == 587 if (c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible || 569 latest_compatible < bcache 588 latest_compatible < bcachefs_metadata_required_upgrade_below) 570 new_version = latest_v 589 new_version = latest_version; 571 else 590 else 572 new_version = latest_c 591 new_version = latest_compatible; 573 } else { 592 } else { 574 switch (c->opts.version_upgrad 593 switch (c->opts.version_upgrade) { 575 case BCH_VERSION_UPGRADE_compa 594 case BCH_VERSION_UPGRADE_compatible: 576 new_version = latest_c 595 new_version = latest_compatible; 577 break; 596 break; 578 case BCH_VERSION_UPGRADE_incom 597 case BCH_VERSION_UPGRADE_incompatible: 579 new_version = latest_v 598 new_version = latest_version; 580 break; 599 break; 581 case BCH_VERSION_UPGRADE_none: 600 case BCH_VERSION_UPGRADE_none: 582 new_version = min(old_ 601 new_version = min(old_version, latest_version); 583 break; 602 break; 584 } 603 } 585 } 604 } 586 605 587 if (new_version > old_version) { 606 if (new_version > old_version) { 588 struct printbuf buf = PRINTBUF 607 struct printbuf buf = PRINTBUF; 589 608 590 if (old_version < bcachefs_met 609 if (old_version < bcachefs_metadata_required_upgrade_below) 591 prt_str(&buf, "Version 610 prt_str(&buf, "Version upgrade required:\n"); 592 611 593 if (old_version != c->sb.versi 612 if (old_version != c->sb.version) { 594 prt_str(&buf, "Version 613 prt_str(&buf, "Version upgrade from "); 595 bch2_version_to_text(& 614 bch2_version_to_text(&buf, c->sb.version_upgrade_complete); 596 prt_str(&buf, " to "); 615 prt_str(&buf, " to "); 597 bch2_version_to_text(& 616 bch2_version_to_text(&buf, c->sb.version); 598 prt_str(&buf, " incomp 617 prt_str(&buf, " incomplete\n"); 599 } 618 } 600 619 601 prt_printf(&buf, "Doing %s ver 620 prt_printf(&buf, "Doing %s version upgrade from ", 602 BCH_VERSION_MAJOR(o 621 BCH_VERSION_MAJOR(old_version) != BCH_VERSION_MAJOR(new_version) 603 ? "incompatible" : 622 ? "incompatible" : "compatible"); 604 bch2_version_to_text(&buf, old 623 bch2_version_to_text(&buf, old_version); 605 prt_str(&buf, " to "); 624 prt_str(&buf, " to "); 606 bch2_version_to_text(&buf, new 625 bch2_version_to_text(&buf, new_version); 607 prt_newline(&buf); 626 prt_newline(&buf); 608 627 609 struct bch_sb_field_ext *ext = 628 struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); 610 __le64 passes = ext->recovery_ 629 __le64 passes = ext->recovery_passes_required[0]; 611 bch2_sb_set_upgrade(c, old_ver 630 bch2_sb_set_upgrade(c, old_version, new_version); 612 passes = ext->recovery_passes_ 631 passes = ext->recovery_passes_required[0] & ~passes; 613 632 614 if (passes) { 633 if (passes) { 615 prt_str(&buf, " runni 634 prt_str(&buf, " running recovery passes: "); 616 prt_bitflags(&buf, bch 635 prt_bitflags(&buf, bch2_recovery_passes, 617 bch2_reco 636 bch2_recovery_passes_from_stable(le64_to_cpu(passes))); 618 } 637 } 619 638 620 bch_info(c, "%s", buf.buf); 639 bch_info(c, "%s", buf.buf); 621 640 622 bch2_sb_upgrade(c, new_version 641 bch2_sb_upgrade(c, new_version); 623 642 624 printbuf_exit(&buf); 643 printbuf_exit(&buf); 625 return true; 644 return true; 626 } 645 } 627 646 628 return false; 647 return false; 629 } 648 } 630 649 >> 650 u64 bch2_fsck_recovery_passes(void) >> 651 { >> 652 u64 ret = 0; >> 653 >> 654 for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++) >> 655 if (recovery_pass_fns[i].when & PASS_FSCK) >> 656 ret |= BIT_ULL(i); >> 657 return ret; >> 658 } >> 659 >> 660 static bool should_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass) >> 661 { >> 662 struct recovery_pass_fn *p = recovery_pass_fns + pass; >> 663 >> 664 if (c->opts.norecovery && pass > BCH_RECOVERY_PASS_snapshots_read) >> 665 return false; >> 666 if (c->recovery_passes_explicit & BIT_ULL(pass)) >> 667 return true; >> 668 if ((p->when & PASS_FSCK) && c->opts.fsck) >> 669 return true; >> 670 if ((p->when & PASS_UNCLEAN) && !c->sb.clean) >> 671 return true; >> 672 if (p->when & PASS_ALWAYS) >> 673 return true; >> 674 return false; >> 675 } >> 676 >> 677 static int bch2_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass) >> 678 { >> 679 struct recovery_pass_fn *p = recovery_pass_fns + pass; >> 680 int ret; >> 681 >> 682 if (!(p->when & PASS_SILENT)) >> 683 bch2_print(c, KERN_INFO bch2_log_msg(c, "%s..."), >> 684 bch2_recovery_passes[pass]); >> 685 ret = p->fn(c); >> 686 if (ret) >> 687 return ret; >> 688 if (!(p->when & PASS_SILENT)) >> 689 bch2_print(c, KERN_CONT " done\n"); >> 690 >> 691 return 0; >> 692 } >> 693 >> 694 static int bch2_run_recovery_passes(struct bch_fs *c) >> 695 { >> 696 int ret = 0; >> 697 >> 698 while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns)) { >> 699 if (should_run_recovery_pass(c, c->curr_recovery_pass)) { >> 700 unsigned pass = c->curr_recovery_pass; >> 701 >> 702 ret = bch2_run_recovery_pass(c, c->curr_recovery_pass); >> 703 if (bch2_err_matches(ret, BCH_ERR_restart_recovery) || >> 704 (ret && c->curr_recovery_pass < pass)) >> 705 continue; >> 706 if (ret) >> 707 break; >> 708 >> 709 c->recovery_passes_complete |= BIT_ULL(c->curr_recovery_pass); >> 710 } >> 711 c->curr_recovery_pass++; >> 712 c->recovery_pass_done = max(c->recovery_pass_done, c->curr_recovery_pass); >> 713 } >> 714 >> 715 return ret; >> 716 } >> 717 >> 718 int bch2_run_online_recovery_passes(struct bch_fs *c) >> 719 { >> 720 int ret = 0; >> 721 >> 722 for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++) { >> 723 struct recovery_pass_fn *p = recovery_pass_fns + i; >> 724 >> 725 if (!(p->when & PASS_ONLINE)) >> 726 continue; >> 727 >> 728 ret = bch2_run_recovery_pass(c, i); >> 729 if (bch2_err_matches(ret, BCH_ERR_restart_recovery)) { >> 730 i = c->curr_recovery_pass; >> 731 continue; >> 732 } >> 733 if (ret) >> 734 break; >> 735 } >> 736 >> 737 return ret; >> 738 } >> 739 631 int bch2_fs_recovery(struct bch_fs *c) 740 int bch2_fs_recovery(struct bch_fs *c) 632 { 741 { 633 struct bch_sb_field_clean *clean = NUL 742 struct bch_sb_field_clean *clean = NULL; 634 struct jset *last_journal_entry = NULL 743 struct jset *last_journal_entry = NULL; 635 u64 last_seq = 0, blacklist_seq, journ 744 u64 last_seq = 0, blacklist_seq, journal_seq; 636 int ret = 0; 745 int ret = 0; 637 746 638 if (c->sb.clean) { 747 if (c->sb.clean) { 639 clean = bch2_read_superblock_c 748 clean = bch2_read_superblock_clean(c); 640 ret = PTR_ERR_OR_ZERO(clean); 749 ret = PTR_ERR_OR_ZERO(clean); 641 if (ret) 750 if (ret) 642 goto err; 751 goto err; 643 752 644 bch_info(c, "recovering from c 753 bch_info(c, "recovering from clean shutdown, journal seq %llu", 645 le64_to_cpu(clean->jo 754 le64_to_cpu(clean->journal_seq)); 646 } else { 755 } else { 647 bch_info(c, "recovering from u 756 bch_info(c, "recovering from unclean shutdown"); 648 } 757 } 649 758 650 if (!(c->sb.features & (1ULL << BCH_FE 759 if (!(c->sb.features & (1ULL << BCH_FEATURE_new_extent_overwrite))) { 651 bch_err(c, "feature new_extent 760 bch_err(c, "feature new_extent_overwrite not set, filesystem no longer supported"); 652 ret = -EINVAL; 761 ret = -EINVAL; 653 goto err; 762 goto err; 654 } 763 } 655 764 656 if (!c->sb.clean && 765 if (!c->sb.clean && 657 !(c->sb.features & (1ULL << BCH_FE 766 !(c->sb.features & (1ULL << BCH_FEATURE_extents_above_btree_updates))) { 658 bch_err(c, "filesystem needs r 767 bch_err(c, "filesystem needs recovery from older version; run fsck from older bcachefs-tools to fix"); 659 ret = -EINVAL; 768 ret = -EINVAL; 660 goto err; 769 goto err; 661 } 770 } 662 771 663 if (c->opts.norecovery) !! 772 if (c->opts.fsck && c->opts.norecovery) { 664 c->opts.recovery_pass_last = B !! 773 bch_err(c, "cannot select both norecovery and fsck"); 665 !! 774 ret = -EINVAL; 666 mutex_lock(&c->sb_lock); !! 775 goto err; 667 struct bch_sb_field_ext *ext = bch2_sb << 668 bool write_sb = false; << 669 << 670 if (BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk << 671 ext->recovery_passes_required[ << 672 cpu_to_le64(bch2_recov << 673 write_sb = true; << 674 } 776 } 675 777 676 u64 sb_passes = bch2_recovery_passes_f !! 778 if (!c->opts.nochanges) { 677 if (sb_passes) { !! 779 mutex_lock(&c->sb_lock); 678 struct printbuf buf = PRINTBUF !! 780 bool write_sb = false; 679 prt_str(&buf, "superblock requ << 680 prt_bitflags(&buf, bch2_recove << 681 bch_info(c, "%s", buf.buf); << 682 printbuf_exit(&buf); << 683 } << 684 781 685 if (bch2_check_version_downgrade(c)) { !! 782 struct bch_sb_field_ext *ext = 686 struct printbuf buf = PRINTBUF !! 783 bch2_sb_field_get_minsize(&c->disk_sb, ext, sizeof(*ext) / sizeof(u64)); >> 784 if (!ext) { >> 785 ret = -BCH_ERR_ENOSPC_sb; >> 786 mutex_unlock(&c->sb_lock); >> 787 goto err; >> 788 } 687 789 688 prt_str(&buf, "Version downgra !! 790 if (BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb)) { >> 791 ext->recovery_passes_required[0] |= >> 792 cpu_to_le64(bch2_recovery_passes_to_stable(BIT_ULL(BCH_RECOVERY_PASS_check_topology))); >> 793 write_sb = true; >> 794 } 689 795 690 __le64 passes = ext->recovery_ !! 796 u64 sb_passes = bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0])); 691 bch2_sb_set_downgrade(c, !! 797 if (sb_passes) { 692 BCH_VERS !! 798 struct printbuf buf = PRINTBUF; 693 BCH_VERS !! 799 prt_str(&buf, "superblock requires following recovery passes to be run:\n "); 694 passes = ext->recovery_passes_ !! 800 prt_bitflags(&buf, bch2_recovery_passes, sb_passes); 695 if (passes) { !! 801 bch_info(c, "%s", buf.buf); 696 prt_str(&buf, "\n run !! 802 printbuf_exit(&buf); 697 prt_bitflags(&buf, bch << 698 bch2_reco << 699 } 803 } 700 804 701 bch_info(c, "%s", buf.buf); !! 805 if (bch2_check_version_downgrade(c)) { 702 printbuf_exit(&buf); !! 806 struct printbuf buf = PRINTBUF; 703 write_sb = true; << 704 } << 705 807 706 if (check_version_upgrade(c)) !! 808 prt_str(&buf, "Version downgrade required:"); 707 write_sb = true; << 708 809 709 c->opts.recovery_passes |= bch2_recove !! 810 __le64 passes = ext->recovery_passes_required[0]; >> 811 bch2_sb_set_downgrade(c, >> 812 BCH_VERSION_MINOR(bcachefs_metadata_version_current), >> 813 BCH_VERSION_MINOR(c->sb.version)); >> 814 passes = ext->recovery_passes_required[0] & ~passes; >> 815 if (passes) { >> 816 prt_str(&buf, "\n running recovery passes: "); >> 817 prt_bitflags(&buf, bch2_recovery_passes, >> 818 bch2_recovery_passes_from_stable(le64_to_cpu(passes))); >> 819 } 710 820 711 if (write_sb) !! 821 bch_info(c, "%s", buf.buf); 712 bch2_write_super(c); !! 822 printbuf_exit(&buf); 713 mutex_unlock(&c->sb_lock); !! 823 write_sb = true; >> 824 } >> 825 >> 826 if (check_version_upgrade(c)) >> 827 write_sb = true; >> 828 >> 829 if (write_sb) >> 830 bch2_write_super(c); >> 831 >> 832 c->recovery_passes_explicit |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0])); >> 833 mutex_unlock(&c->sb_lock); >> 834 } 714 835 715 if (c->opts.fsck && IS_ENABLED(CONFIG_ 836 if (c->opts.fsck && IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) 716 c->opts.recovery_passes |= BIT !! 837 c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_topology); 717 838 718 if (c->opts.fsck) 839 if (c->opts.fsck) 719 set_bit(BCH_FS_fsck_running, & 840 set_bit(BCH_FS_fsck_running, &c->flags); 720 if (c->sb.clean) << 721 set_bit(BCH_FS_clean_recovery, << 722 841 723 ret = bch2_blacklist_table_initialize( 842 ret = bch2_blacklist_table_initialize(c); 724 if (ret) { 843 if (ret) { 725 bch_err(c, "error initializing 844 bch_err(c, "error initializing blacklist table"); 726 goto err; 845 goto err; 727 } 846 } 728 847 729 bch2_journal_pos_from_member_info_resu !! 848 if (!c->sb.clean || c->opts.fsck || c->opts.keep_journal) { 730 << 731 if (!c->sb.clean || c->opts.retain_rec << 732 struct genradix_iter iter; 849 struct genradix_iter iter; 733 struct journal_replay **i; 850 struct journal_replay **i; 734 851 735 bch_verbose(c, "starting journ 852 bch_verbose(c, "starting journal read"); 736 ret = bch2_journal_read(c, &la 853 ret = bch2_journal_read(c, &last_seq, &blacklist_seq, &journal_seq); 737 if (ret) 854 if (ret) 738 goto err; 855 goto err; 739 856 740 /* 857 /* 741 * note: cmd_list_journal need 858 * note: cmd_list_journal needs the blacklist table fully up to date so 742 * it can asterisk ignored jou 859 * it can asterisk ignored journal entries: 743 */ 860 */ 744 if (c->opts.read_journal_only) 861 if (c->opts.read_journal_only) 745 goto out; 862 goto out; 746 863 747 genradix_for_each_reverse(&c-> 864 genradix_for_each_reverse(&c->journal_entries, iter, i) 748 if (!journal_replay_ig !! 865 if (*i && !(*i)->ignore) { 749 last_journal_e 866 last_journal_entry = &(*i)->j; 750 break; 867 break; 751 } 868 } 752 869 753 if (mustfix_fsck_err_on(c->sb. 870 if (mustfix_fsck_err_on(c->sb.clean && 754 last_j 871 last_journal_entry && 755 !journ 872 !journal_entry_empty(last_journal_entry), c, 756 clean_but_jour 873 clean_but_journal_not_empty, 757 "filesystem ma 874 "filesystem marked clean but journal not empty")) { 758 c->sb.compat &= ~(1ULL 875 c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info); 759 SET_BCH_SB_CLEAN(c->di 876 SET_BCH_SB_CLEAN(c->disk_sb.sb, false); 760 c->sb.clean = false; 877 c->sb.clean = false; 761 } 878 } 762 879 763 if (!last_journal_entry) { 880 if (!last_journal_entry) { 764 fsck_err_on(!c->sb.cle 881 fsck_err_on(!c->sb.clean, c, 765 dirty_but_ 882 dirty_but_no_journal_entries, 766 "no journa 883 "no journal entries found"); 767 if (clean) 884 if (clean) 768 goto use_clean 885 goto use_clean; 769 886 770 genradix_for_each_reve 887 genradix_for_each_reverse(&c->journal_entries, iter, i) 771 if (*i) { 888 if (*i) { 772 last_j 889 last_journal_entry = &(*i)->j; 773 (*i)-> !! 890 (*i)->ignore = false; 774 (*i)-> << 775 /* 891 /* 776 * Thi 892 * This was probably a NO_FLUSH entry, 777 * so 893 * so last_seq was garbage - but we know 778 * we' 894 * we're only using a single journal 779 * ent 895 * entry, set it here: 780 */ 896 */ 781 (*i)-> 897 (*i)->j.last_seq = (*i)->j.seq; 782 break; 898 break; 783 } 899 } 784 } 900 } 785 901 786 ret = bch2_journal_keys_sort(c 902 ret = bch2_journal_keys_sort(c); 787 if (ret) 903 if (ret) 788 goto err; 904 goto err; 789 905 790 if (c->sb.clean && last_journa 906 if (c->sb.clean && last_journal_entry) { 791 ret = bch2_verify_supe 907 ret = bch2_verify_superblock_clean(c, &clean, 792 908 last_journal_entry); 793 if (ret) 909 if (ret) 794 goto err; 910 goto err; 795 } 911 } 796 } else { 912 } else { 797 use_clean: 913 use_clean: 798 if (!clean) { 914 if (!clean) { 799 bch_err(c, "no superbl 915 bch_err(c, "no superblock clean section found"); 800 ret = -BCH_ERR_fsck_re 916 ret = -BCH_ERR_fsck_repair_impossible; 801 goto err; 917 goto err; 802 918 803 } 919 } 804 blacklist_seq = journal_seq = 920 blacklist_seq = journal_seq = le64_to_cpu(clean->journal_seq) + 1; 805 } 921 } 806 922 807 c->journal_replay_seq_start = last 923 c->journal_replay_seq_start = last_seq; 808 c->journal_replay_seq_end = blac 924 c->journal_replay_seq_end = blacklist_seq - 1; 809 925 810 if (c->opts.reconstruct_alloc) !! 926 if (c->opts.reconstruct_alloc) { 811 bch2_reconstruct_alloc(c); !! 927 c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info); >> 928 drop_alloc_keys(&c->journal_keys); >> 929 } 812 930 813 zero_out_btree_mem_ptr(&c->journal_key 931 zero_out_btree_mem_ptr(&c->journal_keys); 814 932 815 ret = journal_replay_early(c, clean); 933 ret = journal_replay_early(c, clean); 816 if (ret) 934 if (ret) 817 goto err; 935 goto err; 818 936 819 /* 937 /* 820 * After an unclean shutdown, skip the 938 * After an unclean shutdown, skip then next few journal sequence 821 * numbers as they may have been refer 939 * numbers as they may have been referenced by btree writes that 822 * happened before their corresponding 940 * happened before their corresponding journal writes - those btree 823 * writes need to be ignored, by skipp 941 * writes need to be ignored, by skipping and blacklisting the next few 824 * journal sequence numbers: 942 * journal sequence numbers: 825 */ 943 */ 826 if (!c->sb.clean) 944 if (!c->sb.clean) 827 journal_seq += 8; 945 journal_seq += 8; 828 946 829 if (blacklist_seq != journal_seq) { 947 if (blacklist_seq != journal_seq) { 830 ret = bch2_journal_log_msg(c 948 ret = bch2_journal_log_msg(c, "blacklisting entries %llu-%llu", 831 b 949 blacklist_seq, journal_seq) ?: 832 bch2_journal_seq_black 950 bch2_journal_seq_blacklist_add(c, 833 blackl 951 blacklist_seq, journal_seq); 834 if (ret) { 952 if (ret) { 835 bch_err_msg(c, ret, "e !! 953 bch_err(c, "error creating new journal seq blacklist entry"); 836 goto err; 954 goto err; 837 } 955 } 838 } 956 } 839 957 840 ret = bch2_journal_log_msg(c, "start 958 ret = bch2_journal_log_msg(c, "starting journal at entry %llu, replaying %llu-%llu", 841 journal_s 959 journal_seq, last_seq, blacklist_seq - 1) ?: 842 bch2_fs_journal_start(&c->jour 960 bch2_fs_journal_start(&c->journal, journal_seq); 843 if (ret) 961 if (ret) 844 goto err; 962 goto err; 845 963 >> 964 if (c->opts.reconstruct_alloc) >> 965 bch2_journal_log_msg(c, "dropping alloc info"); >> 966 846 /* 967 /* 847 * Skip past versions that might have 968 * Skip past versions that might have possibly been used (as nonces), 848 * but hadn't had their pointers writt 969 * but hadn't had their pointers written: 849 */ 970 */ 850 if (c->sb.encryption_type && !c->sb.cl 971 if (c->sb.encryption_type && !c->sb.clean) 851 atomic64_add(1 << 16, &c->key_ 972 atomic64_add(1 << 16, &c->key_version); 852 973 853 ret = read_btree_roots(c); 974 ret = read_btree_roots(c); 854 if (ret) 975 if (ret) 855 goto err; 976 goto err; 856 977 857 set_bit(BCH_FS_btree_running, &c->flag << 858 << 859 ret = bch2_sb_set_upgrade_extra(c); << 860 << 861 ret = bch2_run_recovery_passes(c); 978 ret = bch2_run_recovery_passes(c); 862 if (ret) 979 if (ret) 863 goto err; 980 goto err; 864 981 865 /* << 866 * Normally set by the appropriate rec << 867 * indicates we're in early recovery a << 868 * being applied to the journal replay << 869 * multithreaded use: << 870 */ << 871 set_bit(BCH_FS_may_go_rw, &c->flags); << 872 clear_bit(BCH_FS_fsck_running, &c->fla 982 clear_bit(BCH_FS_fsck_running, &c->flags); 873 983 874 /* in case we don't run journal replay << 875 set_bit(BCH_FS_accounting_replay_done, << 876 << 877 /* fsync if we fixed errors */ << 878 if (test_bit(BCH_FS_errors_fixed, &c-> << 879 bch2_write_ref_tryget(c, BCH_WRITE << 880 bch2_journal_flush_all_pins(&c << 881 bch2_journal_meta(&c->journal) << 882 bch2_write_ref_put(c, BCH_WRIT << 883 } << 884 << 885 /* If we fixed errors, verify that fs 984 /* If we fixed errors, verify that fs is actually clean now: */ 886 if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) 985 if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) && 887 test_bit(BCH_FS_errors_fixed, &c-> 986 test_bit(BCH_FS_errors_fixed, &c->flags) && 888 !test_bit(BCH_FS_errors_not_fixed, 987 !test_bit(BCH_FS_errors_not_fixed, &c->flags) && 889 !test_bit(BCH_FS_error, &c->flags) 988 !test_bit(BCH_FS_error, &c->flags)) { 890 bch2_flush_fsck_errs(c); 989 bch2_flush_fsck_errs(c); 891 990 892 bch_info(c, "Fixed errors, run 991 bch_info(c, "Fixed errors, running fsck a second time to verify fs is clean"); 893 clear_bit(BCH_FS_errors_fixed, 992 clear_bit(BCH_FS_errors_fixed, &c->flags); 894 993 895 c->curr_recovery_pass = BCH_RE 994 c->curr_recovery_pass = BCH_RECOVERY_PASS_check_alloc_info; 896 995 897 ret = bch2_run_recovery_passes 996 ret = bch2_run_recovery_passes(c); 898 if (ret) 997 if (ret) 899 goto err; 998 goto err; 900 999 901 if (test_bit(BCH_FS_errors_fix 1000 if (test_bit(BCH_FS_errors_fixed, &c->flags) || 902 test_bit(BCH_FS_errors_not 1001 test_bit(BCH_FS_errors_not_fixed, &c->flags)) { 903 bch_err(c, "Second fsc 1002 bch_err(c, "Second fsck run was not clean"); 904 set_bit(BCH_FS_errors_ 1003 set_bit(BCH_FS_errors_not_fixed, &c->flags); 905 } 1004 } 906 1005 907 set_bit(BCH_FS_errors_fixed, & 1006 set_bit(BCH_FS_errors_fixed, &c->flags); 908 } 1007 } 909 1008 910 if (enabled_qtypes(c)) { 1009 if (enabled_qtypes(c)) { 911 bch_verbose(c, "reading quotas 1010 bch_verbose(c, "reading quotas"); 912 ret = bch2_fs_quota_read(c); 1011 ret = bch2_fs_quota_read(c); 913 if (ret) 1012 if (ret) 914 goto err; 1013 goto err; 915 bch_verbose(c, "quotas done"); 1014 bch_verbose(c, "quotas done"); 916 } 1015 } 917 1016 918 mutex_lock(&c->sb_lock); 1017 mutex_lock(&c->sb_lock); 919 ext = bch2_sb_field_get(c->disk_sb.sb, !! 1018 bool write_sb = false; 920 write_sb = false; << 921 1019 922 if (BCH_SB_VERSION_UPGRADE_COMPLETE(c- 1020 if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) != le16_to_cpu(c->disk_sb.sb->version)) { 923 SET_BCH_SB_VERSION_UPGRADE_COM 1021 SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, le16_to_cpu(c->disk_sb.sb->version)); 924 write_sb = true; 1022 write_sb = true; 925 } 1023 } 926 1024 927 if (!test_bit(BCH_FS_error, &c->flags) 1025 if (!test_bit(BCH_FS_error, &c->flags) && 928 !(c->disk_sb.sb->compat[0] & cpu_t 1026 !(c->disk_sb.sb->compat[0] & cpu_to_le64(1ULL << BCH_COMPAT_alloc_info))) { 929 c->disk_sb.sb->compat[0] |= cp 1027 c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_alloc_info); 930 write_sb = true; 1028 write_sb = true; 931 } 1029 } 932 1030 933 if (!test_bit(BCH_FS_error, &c->flags) !! 1031 if (!test_bit(BCH_FS_error, &c->flags)) { 934 !bch2_is_zero(ext->errors_silent, !! 1032 struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); 935 memset(ext->errors_silent, 0, !! 1033 if (ext && 936 write_sb = true; !! 1034 (!bch2_is_zero(ext->recovery_passes_required, sizeof(ext->recovery_passes_required)) || 937 } !! 1035 !bch2_is_zero(ext->errors_silent, sizeof(ext->errors_silent)))) { 938 !! 1036 memset(ext->recovery_passes_required, 0, sizeof(ext->recovery_passes_required)); 939 if (c->opts.fsck && !! 1037 memset(ext->errors_silent, 0, sizeof(ext->errors_silent)); 940 !test_bit(BCH_FS_error, &c->flags) !! 1038 write_sb = true; 941 c->recovery_pass_done == BCH_RECOV !! 1039 } 942 ext->btrees_lost_data) { << 943 ext->btrees_lost_data = 0; << 944 write_sb = true; << 945 } 1040 } 946 1041 947 if (c->opts.fsck && 1042 if (c->opts.fsck && 948 !test_bit(BCH_FS_error, &c->flags) 1043 !test_bit(BCH_FS_error, &c->flags) && 949 !test_bit(BCH_FS_errors_not_fixed, 1044 !test_bit(BCH_FS_errors_not_fixed, &c->flags)) { 950 SET_BCH_SB_HAS_ERRORS(c->disk_ 1045 SET_BCH_SB_HAS_ERRORS(c->disk_sb.sb, 0); 951 SET_BCH_SB_HAS_TOPOLOGY_ERRORS 1046 SET_BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb, 0); 952 write_sb = true; 1047 write_sb = true; 953 } 1048 } 954 1049 955 if (bch2_blacklist_entries_gc(c)) << 956 write_sb = true; << 957 << 958 if (write_sb) 1050 if (write_sb) 959 bch2_write_super(c); 1051 bch2_write_super(c); 960 mutex_unlock(&c->sb_lock); 1052 mutex_unlock(&c->sb_lock); 961 1053 962 if (!(c->sb.compat & (1ULL << BCH_COMP 1054 if (!(c->sb.compat & (1ULL << BCH_COMPAT_extents_above_btree_updates_done)) || 963 c->sb.version_min < bcachefs_metad 1055 c->sb.version_min < bcachefs_metadata_version_btree_ptr_sectors_written) { 964 struct bch_move_stats stats; 1056 struct bch_move_stats stats; 965 1057 966 bch2_move_stats_init(&stats, " 1058 bch2_move_stats_init(&stats, "recovery"); 967 1059 968 struct printbuf buf = PRINTBUF 1060 struct printbuf buf = PRINTBUF; 969 bch2_version_to_text(&buf, c-> 1061 bch2_version_to_text(&buf, c->sb.version_min); 970 bch_info(c, "scanning for old 1062 bch_info(c, "scanning for old btree nodes: min_version %s", buf.buf); 971 printbuf_exit(&buf); 1063 printbuf_exit(&buf); 972 1064 973 ret = bch2_fs_read_write_ear 1065 ret = bch2_fs_read_write_early(c) ?: 974 bch2_scan_old_btree_no 1066 bch2_scan_old_btree_nodes(c, &stats); 975 if (ret) 1067 if (ret) 976 goto err; 1068 goto err; 977 bch_info(c, "scanning for old 1069 bch_info(c, "scanning for old btree nodes done"); 978 } 1070 } 979 1071 >> 1072 if (c->journal_seq_blacklist_table && >> 1073 c->journal_seq_blacklist_table->nr > 128) >> 1074 queue_work(system_long_wq, &c->journal_seq_blacklist_gc_work); >> 1075 980 ret = 0; 1076 ret = 0; 981 out: 1077 out: 982 bch2_flush_fsck_errs(c); 1078 bch2_flush_fsck_errs(c); 983 1079 984 if (!c->opts.retain_recovery_info) { !! 1080 if (!c->opts.keep_journal && >> 1081 test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)) 985 bch2_journal_keys_put_initial( 1082 bch2_journal_keys_put_initial(c); 986 bch2_find_btree_nodes_exit(&c- !! 1083 kfree(clean); 987 } << 988 if (!IS_ERR(clean)) << 989 kfree(clean); << 990 1084 991 if (!ret && 1085 if (!ret && 992 test_bit(BCH_FS_need_delete_dead_s 1086 test_bit(BCH_FS_need_delete_dead_snapshots, &c->flags) && 993 !c->opts.nochanges) { 1087 !c->opts.nochanges) { 994 bch2_fs_read_write_early(c); 1088 bch2_fs_read_write_early(c); 995 bch2_delete_dead_snapshots_asy 1089 bch2_delete_dead_snapshots_async(c); 996 } 1090 } 997 1091 998 bch_err_fn(c, ret); 1092 bch_err_fn(c, ret); 999 return ret; 1093 return ret; 1000 err: 1094 err: 1001 fsck_err: 1095 fsck_err: 1002 bch2_fs_emergency_read_only(c); 1096 bch2_fs_emergency_read_only(c); 1003 goto out; 1097 goto out; 1004 } 1098 } 1005 1099 1006 int bch2_fs_initialize(struct bch_fs *c) 1100 int bch2_fs_initialize(struct bch_fs *c) 1007 { 1101 { 1008 struct bch_inode_unpacked root_inode, 1102 struct bch_inode_unpacked root_inode, lostfound_inode; 1009 struct bkey_inode_buf packed_inode; 1103 struct bkey_inode_buf packed_inode; 1010 struct qstr lostfound = QSTR("lost+fo 1104 struct qstr lostfound = QSTR("lost+found"); 1011 struct bch_member *m; << 1012 int ret; 1105 int ret; 1013 1106 1014 bch_notice(c, "initializing new files 1107 bch_notice(c, "initializing new filesystem"); 1015 set_bit(BCH_FS_new_fs, &c->flags); << 1016 1108 1017 mutex_lock(&c->sb_lock); 1109 mutex_lock(&c->sb_lock); 1018 c->disk_sb.sb->compat[0] |= cpu_to_le 1110 c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done); 1019 c->disk_sb.sb->compat[0] |= cpu_to_le 1111 c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done); 1020 1112 1021 bch2_check_version_downgrade(c); 1113 bch2_check_version_downgrade(c); 1022 1114 1023 if (c->opts.version_upgrade != BCH_VE 1115 if (c->opts.version_upgrade != BCH_VERSION_UPGRADE_none) { 1024 bch2_sb_upgrade(c, bcachefs_m 1116 bch2_sb_upgrade(c, bcachefs_metadata_version_current); 1025 SET_BCH_SB_VERSION_UPGRADE_CO 1117 SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, bcachefs_metadata_version_current); 1026 bch2_write_super(c); 1118 bch2_write_super(c); 1027 } 1119 } 1028 << 1029 for_each_member_device(c, ca) { << 1030 m = bch2_members_v2_get_mut(c << 1031 SET_BCH_MEMBER_FREESPACE_INIT << 1032 ca->mi = bch2_mi_to_cpu(m); << 1033 } << 1034 << 1035 bch2_write_super(c); << 1036 mutex_unlock(&c->sb_lock); 1120 mutex_unlock(&c->sb_lock); 1037 1121 1038 c->curr_recovery_pass = BCH_RECOVERY_ !! 1122 c->curr_recovery_pass = ARRAY_SIZE(recovery_pass_fns); 1039 set_bit(BCH_FS_btree_running, &c->fla << 1040 set_bit(BCH_FS_may_go_rw, &c->flags); 1123 set_bit(BCH_FS_may_go_rw, &c->flags); 1041 1124 1042 for (unsigned i = 0; i < BTREE_ID_NR; 1125 for (unsigned i = 0; i < BTREE_ID_NR; i++) 1043 bch2_btree_root_alloc_fake(c, !! 1126 bch2_btree_root_alloc(c, i); >> 1127 >> 1128 for_each_member_device(c, ca) >> 1129 bch2_dev_usage_init(ca); 1044 1130 1045 ret = bch2_fs_journal_alloc(c); 1131 ret = bch2_fs_journal_alloc(c); 1046 if (ret) 1132 if (ret) 1047 goto err; 1133 goto err; 1048 1134 1049 /* 1135 /* 1050 * journal_res_get() will crash if ca 1136 * journal_res_get() will crash if called before this has 1051 * set up the journal.pin FIFO and jo 1137 * set up the journal.pin FIFO and journal.cur pointer: 1052 */ 1138 */ 1053 bch2_fs_journal_start(&c->journal, 1) 1139 bch2_fs_journal_start(&c->journal, 1); 1054 set_bit(BCH_FS_accounting_replay_done << 1055 bch2_journal_set_replay_done(&c->jour 1140 bch2_journal_set_replay_done(&c->journal); 1056 1141 1057 ret = bch2_fs_read_write_early(c); 1142 ret = bch2_fs_read_write_early(c); 1058 if (ret) 1143 if (ret) 1059 goto err; 1144 goto err; 1060 1145 1061 for_each_member_device(c, ca) { << 1062 ret = bch2_dev_usage_init(ca, << 1063 if (ret) { << 1064 bch2_dev_put(ca); << 1065 goto err; << 1066 } << 1067 } << 1068 << 1069 /* 1146 /* 1070 * Write out the superblock and journ 1147 * Write out the superblock and journal buckets, now that we can do 1071 * btree updates 1148 * btree updates 1072 */ 1149 */ 1073 bch_verbose(c, "marking superblocks") 1150 bch_verbose(c, "marking superblocks"); 1074 ret = bch2_trans_mark_dev_sbs(c); 1151 ret = bch2_trans_mark_dev_sbs(c); 1075 bch_err_msg(c, ret, "marking superblo 1152 bch_err_msg(c, ret, "marking superblocks"); 1076 if (ret) 1153 if (ret) 1077 goto err; 1154 goto err; 1078 1155 1079 for_each_online_member(c, ca) 1156 for_each_online_member(c, ca) 1080 ca->new_fs_bucket_idx = 0; 1157 ca->new_fs_bucket_idx = 0; 1081 1158 1082 ret = bch2_fs_freespace_init(c); 1159 ret = bch2_fs_freespace_init(c); 1083 if (ret) 1160 if (ret) 1084 goto err; 1161 goto err; 1085 1162 1086 ret = bch2_initialize_subvolumes(c); 1163 ret = bch2_initialize_subvolumes(c); 1087 if (ret) 1164 if (ret) 1088 goto err; 1165 goto err; 1089 1166 1090 bch_verbose(c, "reading snapshots tab 1167 bch_verbose(c, "reading snapshots table"); 1091 ret = bch2_snapshots_read(c); 1168 ret = bch2_snapshots_read(c); 1092 if (ret) 1169 if (ret) 1093 goto err; 1170 goto err; 1094 bch_verbose(c, "reading snapshots don 1171 bch_verbose(c, "reading snapshots done"); 1095 1172 1096 bch2_inode_init(c, &root_inode, 0, 0, 1173 bch2_inode_init(c, &root_inode, 0, 0, S_IFDIR|0755, 0, NULL); 1097 root_inode.bi_inum = BCACHEFS_RO 1174 root_inode.bi_inum = BCACHEFS_ROOT_INO; 1098 root_inode.bi_subvol = BCACHEFS_RO 1175 root_inode.bi_subvol = BCACHEFS_ROOT_SUBVOL; 1099 bch2_inode_pack(&packed_inode, &root_ 1176 bch2_inode_pack(&packed_inode, &root_inode); 1100 packed_inode.inode.k.p.snapshot = U32 1177 packed_inode.inode.k.p.snapshot = U32_MAX; 1101 1178 1102 ret = bch2_btree_insert(c, BTREE_ID_i !! 1179 ret = bch2_btree_insert(c, BTREE_ID_inodes, &packed_inode.inode.k_i, NULL, 0); 1103 bch_err_msg(c, ret, "creating root di 1180 bch_err_msg(c, ret, "creating root directory"); 1104 if (ret) 1181 if (ret) 1105 goto err; 1182 goto err; 1106 1183 1107 bch2_inode_init_early(c, &lostfound_i 1184 bch2_inode_init_early(c, &lostfound_inode); 1108 1185 1109 ret = bch2_trans_commit_do(c, NULL, N !! 1186 ret = bch2_trans_do(c, NULL, NULL, 0, 1110 bch2_create_trans(trans, 1187 bch2_create_trans(trans, 1111 BCACHEFS_RO 1188 BCACHEFS_ROOT_SUBVOL_INUM, 1112 &root_inode 1189 &root_inode, &lostfound_inode, 1113 &lostfound, 1190 &lostfound, 1114 0, 0, S_IFD 1191 0, 0, S_IFDIR|0700, 0, 1115 NULL, NULL, 1192 NULL, NULL, (subvol_inum) { 0 }, 0)); 1116 bch_err_msg(c, ret, "creating lost+fo 1193 bch_err_msg(c, ret, "creating lost+found"); 1117 if (ret) 1194 if (ret) 1118 goto err; 1195 goto err; 1119 1196 1120 c->recovery_pass_done = BCH_RECOVERY_ !! 1197 c->recovery_pass_done = ARRAY_SIZE(recovery_pass_fns) - 1; 1121 1198 1122 if (enabled_qtypes(c)) { 1199 if (enabled_qtypes(c)) { 1123 ret = bch2_fs_quota_read(c); 1200 ret = bch2_fs_quota_read(c); 1124 if (ret) 1201 if (ret) 1125 goto err; 1202 goto err; 1126 } 1203 } 1127 1204 1128 ret = bch2_journal_flush(&c->journal) 1205 ret = bch2_journal_flush(&c->journal); 1129 bch_err_msg(c, ret, "writing first jo 1206 bch_err_msg(c, ret, "writing first journal entry"); 1130 if (ret) 1207 if (ret) 1131 goto err; 1208 goto err; 1132 1209 1133 mutex_lock(&c->sb_lock); 1210 mutex_lock(&c->sb_lock); 1134 SET_BCH_SB_INITIALIZED(c->disk_sb.sb, 1211 SET_BCH_SB_INITIALIZED(c->disk_sb.sb, true); 1135 SET_BCH_SB_CLEAN(c->disk_sb.sb, false 1212 SET_BCH_SB_CLEAN(c->disk_sb.sb, false); 1136 1213 1137 bch2_write_super(c); 1214 bch2_write_super(c); 1138 mutex_unlock(&c->sb_lock); 1215 mutex_unlock(&c->sb_lock); 1139 1216 1140 return 0; 1217 return 0; 1141 err: 1218 err: 1142 bch_err_fn(c, ret); 1219 bch_err_fn(c, ret); 1143 return ret; 1220 return ret; 1144 } 1221 } 1145 1222
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.