~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/fs/bcachefs/backpointers.c

Version: ~ [ linux-6.11-rc3 ] ~ [ linux-6.10.4 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.45 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.104 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.164 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.223 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.281 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.319 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0
  2 #include "bcachefs.h"
  3 #include "bbpos.h"
  4 #include "alloc_background.h"
  5 #include "backpointers.h"
  6 #include "bkey_buf.h"
  7 #include "btree_cache.h"
  8 #include "btree_update.h"
  9 #include "btree_update_interior.h"
 10 #include "btree_write_buffer.h"
 11 #include "checksum.h"
 12 #include "error.h"
 13 
 14 #include <linux/mm.h>
 15 
 16 static bool extent_matches_bp(struct bch_fs *c,
 17                               enum btree_id btree_id, unsigned level,
 18                               struct bkey_s_c k,
 19                               struct bpos bucket,
 20                               struct bch_backpointer bp)
 21 {
 22         struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
 23         const union bch_extent_entry *entry;
 24         struct extent_ptr_decoded p;
 25 
 26         rcu_read_lock();
 27         bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
 28                 struct bpos bucket2;
 29                 struct bch_backpointer bp2;
 30 
 31                 if (p.ptr.cached)
 32                         continue;
 33 
 34                 struct bch_dev *ca = bch2_dev_rcu(c, p.ptr.dev);
 35                 if (!ca)
 36                         continue;
 37 
 38                 bch2_extent_ptr_to_bp(c, ca, btree_id, level, k, p, entry, &bucket2, &bp2);
 39                 if (bpos_eq(bucket, bucket2) &&
 40                     !memcmp(&bp, &bp2, sizeof(bp))) {
 41                         rcu_read_unlock();
 42                         return true;
 43                 }
 44         }
 45         rcu_read_unlock();
 46 
 47         return false;
 48 }
 49 
 50 int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k,
 51                              enum bch_validate_flags flags,
 52                              struct printbuf *err)
 53 {
 54         struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k);
 55 
 56         rcu_read_lock();
 57         struct bch_dev *ca = bch2_dev_rcu(c, bp.k->p.inode);
 58         if (!ca) {
 59                 /* these will be caught by fsck */
 60                 rcu_read_unlock();
 61                 return 0;
 62         }
 63 
 64         struct bpos bucket = bp_pos_to_bucket(ca, bp.k->p);
 65         struct bpos bp_pos = bucket_pos_to_bp_noerror(ca, bucket, bp.v->bucket_offset);
 66         rcu_read_unlock();
 67         int ret = 0;
 68 
 69         bkey_fsck_err_on((bp.v->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT) >= ca->mi.bucket_size ||
 70                          !bpos_eq(bp.k->p, bp_pos),
 71                          c, err,
 72                          backpointer_bucket_offset_wrong,
 73                          "backpointer bucket_offset wrong");
 74 fsck_err:
 75         return ret;
 76 }
 77 
 78 void bch2_backpointer_to_text(struct printbuf *out, const struct bch_backpointer *bp)
 79 {
 80         prt_printf(out, "btree=%s l=%u offset=%llu:%u len=%u pos=",
 81                bch2_btree_id_str(bp->btree_id),
 82                bp->level,
 83                (u64) (bp->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT),
 84                (u32) bp->bucket_offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT),
 85                bp->bucket_len);
 86         bch2_bpos_to_text(out, bp->pos);
 87 }
 88 
 89 void bch2_backpointer_k_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
 90 {
 91         rcu_read_lock();
 92         struct bch_dev *ca = bch2_dev_rcu(c, k.k->p.inode);
 93         if (ca) {
 94                 struct bpos bucket = bp_pos_to_bucket(ca, k.k->p);
 95                 rcu_read_unlock();
 96                 prt_str(out, "bucket=");
 97                 bch2_bpos_to_text(out, bucket);
 98                 prt_str(out, " ");
 99         } else {
100                 rcu_read_unlock();
101         }
102 
103         bch2_backpointer_to_text(out, bkey_s_c_to_backpointer(k).v);
104 }
105 
106 void bch2_backpointer_swab(struct bkey_s k)
107 {
108         struct bkey_s_backpointer bp = bkey_s_to_backpointer(k);
109 
110         bp.v->bucket_offset     = swab40(bp.v->bucket_offset);
111         bp.v->bucket_len        = swab32(bp.v->bucket_len);
112         bch2_bpos_swab(&bp.v->pos);
113 }
114 
115 static noinline int backpointer_mod_err(struct btree_trans *trans,
116                                         struct bch_backpointer bp,
117                                         struct bkey_s_c bp_k,
118                                         struct bkey_s_c orig_k,
119                                         bool insert)
120 {
121         struct bch_fs *c = trans->c;
122         struct printbuf buf = PRINTBUF;
123 
124         if (insert) {
125                 prt_printf(&buf, "existing backpointer found when inserting ");
126                 bch2_backpointer_to_text(&buf, &bp);
127                 prt_newline(&buf);
128                 printbuf_indent_add(&buf, 2);
129 
130                 prt_printf(&buf, "found ");
131                 bch2_bkey_val_to_text(&buf, c, bp_k);
132                 prt_newline(&buf);
133 
134                 prt_printf(&buf, "for ");
135                 bch2_bkey_val_to_text(&buf, c, orig_k);
136 
137                 bch_err(c, "%s", buf.buf);
138         } else if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) {
139                 prt_printf(&buf, "backpointer not found when deleting\n");
140                 printbuf_indent_add(&buf, 2);
141 
142                 prt_printf(&buf, "searching for ");
143                 bch2_backpointer_to_text(&buf, &bp);
144                 prt_newline(&buf);
145 
146                 prt_printf(&buf, "got ");
147                 bch2_bkey_val_to_text(&buf, c, bp_k);
148                 prt_newline(&buf);
149 
150                 prt_printf(&buf, "for ");
151                 bch2_bkey_val_to_text(&buf, c, orig_k);
152 
153                 bch_err(c, "%s", buf.buf);
154         }
155 
156         printbuf_exit(&buf);
157 
158         if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) {
159                 return bch2_inconsistent_error(c) ? BCH_ERR_erofs_unfixed_errors : 0;
160         } else {
161                 return 0;
162         }
163 }
164 
165 int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans,
166                                 struct bch_dev *ca,
167                                 struct bpos bucket,
168                                 struct bch_backpointer bp,
169                                 struct bkey_s_c orig_k,
170                                 bool insert)
171 {
172         struct btree_iter bp_iter;
173         struct bkey_s_c k;
174         struct bkey_i_backpointer *bp_k;
175         int ret;
176 
177         bp_k = bch2_trans_kmalloc_nomemzero(trans, sizeof(struct bkey_i_backpointer));
178         ret = PTR_ERR_OR_ZERO(bp_k);
179         if (ret)
180                 return ret;
181 
182         bkey_backpointer_init(&bp_k->k_i);
183         bp_k->k.p = bucket_pos_to_bp(ca, bucket, bp.bucket_offset);
184         bp_k->v = bp;
185 
186         if (!insert) {
187                 bp_k->k.type = KEY_TYPE_deleted;
188                 set_bkey_val_u64s(&bp_k->k, 0);
189         }
190 
191         k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers,
192                                bp_k->k.p,
193                                BTREE_ITER_intent|
194                                BTREE_ITER_slots|
195                                BTREE_ITER_with_updates);
196         ret = bkey_err(k);
197         if (ret)
198                 goto err;
199 
200         if (insert
201             ? k.k->type
202             : (k.k->type != KEY_TYPE_backpointer ||
203                memcmp(bkey_s_c_to_backpointer(k).v, &bp, sizeof(bp)))) {
204                 ret = backpointer_mod_err(trans, bp, k, orig_k, insert);
205                 if (ret)
206                         goto err;
207         }
208 
209         ret = bch2_trans_update(trans, &bp_iter, &bp_k->k_i, 0);
210 err:
211         bch2_trans_iter_exit(trans, &bp_iter);
212         return ret;
213 }
214 
215 /*
216  * Find the next backpointer >= *bp_offset:
217  */
218 int bch2_get_next_backpointer(struct btree_trans *trans,
219                               struct bch_dev *ca,
220                               struct bpos bucket, int gen,
221                               struct bpos *bp_pos,
222                               struct bch_backpointer *bp,
223                               unsigned iter_flags)
224 {
225         struct bpos bp_end_pos = bucket_pos_to_bp(ca, bpos_nosnap_successor(bucket), 0);
226         struct btree_iter alloc_iter = { NULL }, bp_iter = { NULL };
227         struct bkey_s_c k;
228         int ret = 0;
229 
230         if (bpos_ge(*bp_pos, bp_end_pos))
231                 goto done;
232 
233         if (gen >= 0) {
234                 k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc,
235                                        bucket, BTREE_ITER_cached|iter_flags);
236                 ret = bkey_err(k);
237                 if (ret)
238                         goto out;
239 
240                 if (k.k->type != KEY_TYPE_alloc_v4 ||
241                     bkey_s_c_to_alloc_v4(k).v->gen != gen)
242                         goto done;
243         }
244 
245         *bp_pos = bpos_max(*bp_pos, bucket_pos_to_bp(ca, bucket, 0));
246 
247         for_each_btree_key_norestart(trans, bp_iter, BTREE_ID_backpointers,
248                                      *bp_pos, iter_flags, k, ret) {
249                 if (bpos_ge(k.k->p, bp_end_pos))
250                         break;
251 
252                 *bp_pos = k.k->p;
253                 *bp = *bkey_s_c_to_backpointer(k).v;
254                 goto out;
255         }
256 done:
257         *bp_pos = SPOS_MAX;
258 out:
259         bch2_trans_iter_exit(trans, &bp_iter);
260         bch2_trans_iter_exit(trans, &alloc_iter);
261         return ret;
262 }
263 
264 static void backpointer_not_found(struct btree_trans *trans,
265                                   struct bpos bp_pos,
266                                   struct bch_backpointer bp,
267                                   struct bkey_s_c k)
268 {
269         struct bch_fs *c = trans->c;
270         struct printbuf buf = PRINTBUF;
271 
272         /*
273          * If we're using the btree write buffer, the backpointer we were
274          * looking at may have already been deleted - failure to find what it
275          * pointed to is not an error:
276          */
277         if (likely(!bch2_backpointers_no_use_write_buffer))
278                 return;
279 
280         struct bpos bucket;
281         if (!bp_pos_to_bucket_nodev(c, bp_pos, &bucket))
282                 return;
283 
284         prt_printf(&buf, "backpointer doesn't match %s it points to:\n  ",
285                    bp.level ? "btree node" : "extent");
286         prt_printf(&buf, "bucket: ");
287         bch2_bpos_to_text(&buf, bucket);
288         prt_printf(&buf, "\n  ");
289 
290         prt_printf(&buf, "backpointer pos: ");
291         bch2_bpos_to_text(&buf, bp_pos);
292         prt_printf(&buf, "\n  ");
293 
294         bch2_backpointer_to_text(&buf, &bp);
295         prt_printf(&buf, "\n  ");
296         bch2_bkey_val_to_text(&buf, c, k);
297         if (c->curr_recovery_pass >= BCH_RECOVERY_PASS_check_extents_to_backpointers)
298                 bch_err_ratelimited(c, "%s", buf.buf);
299         else
300                 bch2_trans_inconsistent(trans, "%s", buf.buf);
301 
302         printbuf_exit(&buf);
303 }
304 
305 struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
306                                          struct btree_iter *iter,
307                                          struct bpos bp_pos,
308                                          struct bch_backpointer bp,
309                                          unsigned iter_flags)
310 {
311         if (likely(!bp.level)) {
312                 struct bch_fs *c = trans->c;
313 
314                 struct bpos bucket;
315                 if (!bp_pos_to_bucket_nodev(c, bp_pos, &bucket))
316                         return bkey_s_c_err(-EIO);
317 
318                 bch2_trans_node_iter_init(trans, iter,
319                                           bp.btree_id,
320                                           bp.pos,
321                                           0, 0,
322                                           iter_flags);
323                 struct bkey_s_c k = bch2_btree_iter_peek_slot(iter);
324                 if (bkey_err(k)) {
325                         bch2_trans_iter_exit(trans, iter);
326                         return k;
327                 }
328 
329                 if (k.k && extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp))
330                         return k;
331 
332                 bch2_trans_iter_exit(trans, iter);
333                 backpointer_not_found(trans, bp_pos, bp, k);
334                 return bkey_s_c_null;
335         } else {
336                 struct btree *b = bch2_backpointer_get_node(trans, iter, bp_pos, bp);
337 
338                 if (IS_ERR_OR_NULL(b)) {
339                         bch2_trans_iter_exit(trans, iter);
340                         return IS_ERR(b) ? bkey_s_c_err(PTR_ERR(b)) : bkey_s_c_null;
341                 }
342                 return bkey_i_to_s_c(&b->key);
343         }
344 }
345 
346 struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
347                                         struct btree_iter *iter,
348                                         struct bpos bp_pos,
349                                         struct bch_backpointer bp)
350 {
351         struct bch_fs *c = trans->c;
352 
353         BUG_ON(!bp.level);
354 
355         struct bpos bucket;
356         if (!bp_pos_to_bucket_nodev(c, bp_pos, &bucket))
357                 return ERR_PTR(-EIO);
358 
359         bch2_trans_node_iter_init(trans, iter,
360                                   bp.btree_id,
361                                   bp.pos,
362                                   0,
363                                   bp.level - 1,
364                                   0);
365         struct btree *b = bch2_btree_iter_peek_node(iter);
366         if (IS_ERR_OR_NULL(b))
367                 goto err;
368 
369         BUG_ON(b->c.level != bp.level - 1);
370 
371         if (extent_matches_bp(c, bp.btree_id, bp.level,
372                               bkey_i_to_s_c(&b->key),
373                               bucket, bp))
374                 return b;
375 
376         if (btree_node_will_make_reachable(b)) {
377                 b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node);
378         } else {
379                 backpointer_not_found(trans, bp_pos, bp, bkey_i_to_s_c(&b->key));
380                 b = NULL;
381         }
382 err:
383         bch2_trans_iter_exit(trans, iter);
384         return b;
385 }
386 
387 static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_iter *bp_iter,
388                                         struct bkey_s_c k)
389 {
390         struct bch_fs *c = trans->c;
391         struct btree_iter alloc_iter = { NULL };
392         struct bkey_s_c alloc_k;
393         struct printbuf buf = PRINTBUF;
394         int ret = 0;
395 
396         struct bpos bucket;
397         if (!bp_pos_to_bucket_nodev_noerror(c, k.k->p, &bucket)) {
398                 if (fsck_err(trans, backpointer_to_missing_device,
399                              "backpointer for missing device:\n%s",
400                              (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
401                         ret = bch2_btree_delete_at(trans, bp_iter, 0);
402                 goto out;
403         }
404 
405         alloc_k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc, bucket, 0);
406         ret = bkey_err(alloc_k);
407         if (ret)
408                 goto out;
409 
410         if (fsck_err_on(alloc_k.k->type != KEY_TYPE_alloc_v4,
411                         trans, backpointer_to_missing_alloc,
412                         "backpointer for nonexistent alloc key: %llu:%llu:0\n%s",
413                         alloc_iter.pos.inode, alloc_iter.pos.offset,
414                         (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
415                 ret = bch2_btree_delete_at(trans, bp_iter, 0);
416                 goto out;
417         }
418 out:
419 fsck_err:
420         bch2_trans_iter_exit(trans, &alloc_iter);
421         printbuf_exit(&buf);
422         return ret;
423 }
424 
425 /* verify that every backpointer has a corresponding alloc key */
426 int bch2_check_btree_backpointers(struct bch_fs *c)
427 {
428         int ret = bch2_trans_run(c,
429                 for_each_btree_key_commit(trans, iter,
430                         BTREE_ID_backpointers, POS_MIN, 0, k,
431                         NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
432                   bch2_check_btree_backpointer(trans, &iter, k)));
433         bch_err_fn(c, ret);
434         return ret;
435 }
436 
437 struct extents_to_bp_state {
438         struct bpos     bucket_start;
439         struct bpos     bucket_end;
440         struct bkey_buf last_flushed;
441 };
442 
443 static int drop_dev_and_update(struct btree_trans *trans, enum btree_id btree,
444                                struct bkey_s_c extent, unsigned dev)
445 {
446         struct bkey_i *n = bch2_bkey_make_mut_noupdate(trans, extent);
447         int ret = PTR_ERR_OR_ZERO(n);
448         if (ret)
449                 return ret;
450 
451         bch2_bkey_drop_device(bkey_i_to_s(n), dev);
452         return bch2_btree_insert_trans(trans, btree, n, 0);
453 }
454 
455 static int check_extent_checksum(struct btree_trans *trans,
456                                  enum btree_id btree, struct bkey_s_c extent,
457                                  enum btree_id o_btree, struct bkey_s_c extent2, unsigned dev)
458 {
459         struct bch_fs *c = trans->c;
460         struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(extent);
461         const union bch_extent_entry *entry;
462         struct extent_ptr_decoded p;
463         struct printbuf buf = PRINTBUF;
464         void *data_buf = NULL;
465         struct bio *bio = NULL;
466         size_t bytes;
467         int ret = 0;
468 
469         if (bkey_is_btree_ptr(extent.k))
470                 return false;
471 
472         bkey_for_each_ptr_decode(extent.k, ptrs, p, entry)
473                 if (p.ptr.dev == dev)
474                         goto found;
475         BUG();
476 found:
477         if (!p.crc.csum_type)
478                 return false;
479 
480         bytes = p.crc.compressed_size << 9;
481 
482         struct bch_dev *ca = bch2_dev_get_ioref(c, dev, READ);
483         if (!ca)
484                 return false;
485 
486         data_buf = kvmalloc(bytes, GFP_KERNEL);
487         if (!data_buf) {
488                 ret = -ENOMEM;
489                 goto err;
490         }
491 
492         bio = bio_alloc(ca->disk_sb.bdev, buf_pages(data_buf, bytes), REQ_OP_READ, GFP_KERNEL);
493         bio->bi_iter.bi_sector = p.ptr.offset;
494         bch2_bio_map(bio, data_buf, bytes);
495         ret = submit_bio_wait(bio);
496         if (ret)
497                 goto err;
498 
499         prt_str(&buf, "extents pointing to same space, but first extent checksum bad:");
500         prt_printf(&buf, "\n  %s ", bch2_btree_id_str(btree));
501         bch2_bkey_val_to_text(&buf, c, extent);
502         prt_printf(&buf, "\n  %s ", bch2_btree_id_str(o_btree));
503         bch2_bkey_val_to_text(&buf, c, extent2);
504 
505         struct nonce nonce = extent_nonce(extent.k->version, p.crc);
506         struct bch_csum csum = bch2_checksum(c, p.crc.csum_type, nonce, data_buf, bytes);
507         if (fsck_err_on(bch2_crc_cmp(csum, p.crc.csum),
508                         trans, dup_backpointer_to_bad_csum_extent,
509                         "%s", buf.buf))
510                 ret = drop_dev_and_update(trans, btree, extent, dev) ?: 1;
511 fsck_err:
512 err:
513         if (bio)
514                 bio_put(bio);
515         kvfree(data_buf);
516         percpu_ref_put(&ca->io_ref);
517         printbuf_exit(&buf);
518         return ret;
519 }
520 
521 static int check_bp_exists(struct btree_trans *trans,
522                            struct extents_to_bp_state *s,
523                            struct bpos bucket,
524                            struct bch_backpointer bp,
525                            struct bkey_s_c orig_k)
526 {
527         struct bch_fs *c = trans->c;
528         struct btree_iter bp_iter = {};
529         struct btree_iter other_extent_iter = {};
530         struct printbuf buf = PRINTBUF;
531         struct bkey_s_c bp_k;
532         int ret = 0;
533 
534         struct bch_dev *ca = bch2_dev_bucket_tryget(c, bucket);
535         if (!ca) {
536                 prt_str(&buf, "extent for nonexistent device:bucket ");
537                 bch2_bpos_to_text(&buf, bucket);
538                 prt_str(&buf, "\n  ");
539                 bch2_bkey_val_to_text(&buf, c, orig_k);
540                 bch_err(c, "%s", buf.buf);
541                 ret = -BCH_ERR_fsck_repair_unimplemented;
542                 goto err;
543         }
544 
545         if (bpos_lt(bucket, s->bucket_start) ||
546             bpos_gt(bucket, s->bucket_end))
547                 goto out;
548 
549         bp_k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers,
550                                   bucket_pos_to_bp(ca, bucket, bp.bucket_offset),
551                                   0);
552         ret = bkey_err(bp_k);
553         if (ret)
554                 goto err;
555 
556         if (bp_k.k->type != KEY_TYPE_backpointer ||
557             memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) {
558                 ret = bch2_btree_write_buffer_maybe_flush(trans, orig_k, &s->last_flushed);
559                 if (ret)
560                         goto err;
561 
562                 goto check_existing_bp;
563         }
564 out:
565 err:
566 fsck_err:
567         bch2_trans_iter_exit(trans, &other_extent_iter);
568         bch2_trans_iter_exit(trans, &bp_iter);
569         bch2_dev_put(ca);
570         printbuf_exit(&buf);
571         return ret;
572 check_existing_bp:
573         /* Do we have a backpointer for a different extent? */
574         if (bp_k.k->type != KEY_TYPE_backpointer)
575                 goto missing;
576 
577         struct bch_backpointer other_bp = *bkey_s_c_to_backpointer(bp_k).v;
578 
579         struct bkey_s_c other_extent =
580                 bch2_backpointer_get_key(trans, &other_extent_iter, bp_k.k->p, other_bp, 0);
581         ret = bkey_err(other_extent);
582         if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
583                 ret = 0;
584         if (ret)
585                 goto err;
586 
587         if (!other_extent.k)
588                 goto missing;
589 
590         if (bch2_extents_match(orig_k, other_extent)) {
591                 printbuf_reset(&buf);
592                 prt_printf(&buf, "duplicate versions of same extent, deleting smaller\n  ");
593                 bch2_bkey_val_to_text(&buf, c, orig_k);
594                 prt_str(&buf, "\n  ");
595                 bch2_bkey_val_to_text(&buf, c, other_extent);
596                 bch_err(c, "%s", buf.buf);
597 
598                 if (other_extent.k->size <= orig_k.k->size) {
599                         ret = drop_dev_and_update(trans, other_bp.btree_id, other_extent, bucket.inode);
600                         if (ret)
601                                 goto err;
602                         goto out;
603                 } else {
604                         ret = drop_dev_and_update(trans, bp.btree_id, orig_k, bucket.inode);
605                         if (ret)
606                                 goto err;
607                         goto missing;
608                 }
609         }
610 
611         ret = check_extent_checksum(trans, other_bp.btree_id, other_extent, bp.btree_id, orig_k, bucket.inode);
612         if (ret < 0)
613                 goto err;
614         if (ret) {
615                 ret = 0;
616                 goto missing;
617         }
618 
619         ret = check_extent_checksum(trans, bp.btree_id, orig_k, other_bp.btree_id, other_extent, bucket.inode);
620         if (ret < 0)
621                 goto err;
622         if (ret) {
623                 ret = 0;
624                 goto out;
625         }
626 
627         printbuf_reset(&buf);
628         prt_printf(&buf, "duplicate extents pointing to same space on dev %llu\n  ", bucket.inode);
629         bch2_bkey_val_to_text(&buf, c, orig_k);
630         prt_str(&buf, "\n  ");
631         bch2_bkey_val_to_text(&buf, c, other_extent);
632         bch_err(c, "%s", buf.buf);
633         ret = -BCH_ERR_fsck_repair_unimplemented;
634         goto err;
635 missing:
636         printbuf_reset(&buf);
637         prt_printf(&buf, "missing backpointer for btree=%s l=%u ",
638                bch2_btree_id_str(bp.btree_id), bp.level);
639         bch2_bkey_val_to_text(&buf, c, orig_k);
640         prt_printf(&buf, "\n  got:   ");
641         bch2_bkey_val_to_text(&buf, c, bp_k);
642 
643         struct bkey_i_backpointer n_bp_k;
644         bkey_backpointer_init(&n_bp_k.k_i);
645         n_bp_k.k.p = bucket_pos_to_bp(ca, bucket, bp.bucket_offset);
646         n_bp_k.v = bp;
647         prt_printf(&buf, "\n  want:  ");
648         bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&n_bp_k.k_i));
649 
650         if (fsck_err(trans, ptr_to_missing_backpointer, "%s", buf.buf))
651                 ret = bch2_bucket_backpointer_mod(trans, ca, bucket, bp, orig_k, true);
652 
653         goto out;
654 }
655 
656 static int check_extent_to_backpointers(struct btree_trans *trans,
657                                         struct extents_to_bp_state *s,
658                                         enum btree_id btree, unsigned level,
659                                         struct bkey_s_c k)
660 {
661         struct bch_fs *c = trans->c;
662         struct bkey_ptrs_c ptrs;
663         const union bch_extent_entry *entry;
664         struct extent_ptr_decoded p;
665         int ret;
666 
667         ptrs = bch2_bkey_ptrs_c(k);
668         bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
669                 struct bpos bucket_pos = POS_MIN;
670                 struct bch_backpointer bp;
671 
672                 if (p.ptr.cached)
673                         continue;
674 
675                 rcu_read_lock();
676                 struct bch_dev *ca = bch2_dev_rcu(c, p.ptr.dev);
677                 if (ca)
678                         bch2_extent_ptr_to_bp(c, ca, btree, level, k, p, entry, &bucket_pos, &bp);
679                 rcu_read_unlock();
680 
681                 if (!ca)
682                         continue;
683 
684                 ret = check_bp_exists(trans, s, bucket_pos, bp, k);
685                 if (ret)
686                         return ret;
687         }
688 
689         return 0;
690 }
691 
692 static int check_btree_root_to_backpointers(struct btree_trans *trans,
693                                             struct extents_to_bp_state *s,
694                                             enum btree_id btree_id,
695                                             int *level)
696 {
697         struct bch_fs *c = trans->c;
698         struct btree_iter iter;
699         struct btree *b;
700         struct bkey_s_c k;
701         int ret;
702 retry:
703         bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN,
704                                   0, bch2_btree_id_root(c, btree_id)->b->c.level, 0);
705         b = bch2_btree_iter_peek_node(&iter);
706         ret = PTR_ERR_OR_ZERO(b);
707         if (ret)
708                 goto err;
709 
710         if (b != btree_node_root(c, b)) {
711                 bch2_trans_iter_exit(trans, &iter);
712                 goto retry;
713         }
714 
715         *level = b->c.level;
716 
717         k = bkey_i_to_s_c(&b->key);
718         ret = check_extent_to_backpointers(trans, s, btree_id, b->c.level + 1, k);
719 err:
720         bch2_trans_iter_exit(trans, &iter);
721         return ret;
722 }
723 
724 static inline struct bbpos bp_to_bbpos(struct bch_backpointer bp)
725 {
726         return (struct bbpos) {
727                 .btree  = bp.btree_id,
728                 .pos    = bp.pos,
729         };
730 }
731 
732 static u64 mem_may_pin_bytes(struct bch_fs *c)
733 {
734         struct sysinfo i;
735         si_meminfo(&i);
736 
737         u64 mem_bytes = i.totalram * i.mem_unit;
738         return div_u64(mem_bytes * c->opts.fsck_memory_usage_percent, 100);
739 }
740 
741 static size_t btree_nodes_fit_in_ram(struct bch_fs *c)
742 {
743         return div_u64(mem_may_pin_bytes(c), c->opts.btree_node_size);
744 }
745 
746 static int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
747                                         u64 btree_leaf_mask,
748                                         u64 btree_interior_mask,
749                                         struct bbpos start, struct bbpos *end)
750 {
751         struct bch_fs *c = trans->c;
752         s64 mem_may_pin = mem_may_pin_bytes(c);
753         int ret = 0;
754 
755         btree_interior_mask |= btree_leaf_mask;
756 
757         c->btree_cache.pinned_nodes_leaf_mask           = btree_leaf_mask;
758         c->btree_cache.pinned_nodes_interior_mask       = btree_interior_mask;
759         c->btree_cache.pinned_nodes_start               = start;
760         c->btree_cache.pinned_nodes_end                 = *end = BBPOS_MAX;
761 
762         for (enum btree_id btree = start.btree;
763              btree < BTREE_ID_NR && !ret;
764              btree++) {
765                 unsigned depth = (BIT_ULL(btree) & btree_leaf_mask) ? 0 : 1;
766                 struct btree_iter iter;
767                 struct btree *b;
768 
769                 if (!(BIT_ULL(btree) & btree_leaf_mask) &&
770                     !(BIT_ULL(btree) & btree_interior_mask))
771                         continue;
772 
773                 bch2_trans_begin(trans);
774 
775                 __for_each_btree_node(trans, iter, btree,
776                                       btree == start.btree ? start.pos : POS_MIN,
777                                       0, depth, BTREE_ITER_prefetch, b, ret) {
778                         mem_may_pin -= btree_buf_bytes(b);
779                         if (mem_may_pin <= 0) {
780                                 c->btree_cache.pinned_nodes_end = *end =
781                                         BBPOS(btree, b->key.k.p);
782                                 bch2_trans_iter_exit(trans, &iter);
783                                 return 0;
784                         }
785                 }
786                 bch2_trans_iter_exit(trans, &iter);
787         }
788 
789         return ret;
790 }
791 
792 static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
793                                                    struct extents_to_bp_state *s)
794 {
795         struct bch_fs *c = trans->c;
796         int ret = 0;
797 
798         for (enum btree_id btree_id = 0;
799              btree_id < btree_id_nr_alive(c);
800              btree_id++) {
801                 int level, depth = btree_type_has_ptrs(btree_id) ? 0 : 1;
802 
803                 ret = commit_do(trans, NULL, NULL,
804                                 BCH_TRANS_COMMIT_no_enospc,
805                                 check_btree_root_to_backpointers(trans, s, btree_id, &level));
806                 if (ret)
807                         return ret;
808 
809                 while (level >= depth) {
810                         struct btree_iter iter;
811                         bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0, level,
812                                                   BTREE_ITER_prefetch);
813 
814                         ret = for_each_btree_key_continue(trans, iter, 0, k, ({
815                                 check_extent_to_backpointers(trans, s, btree_id, level, k) ?:
816                                 bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
817                         }));
818                         if (ret)
819                                 return ret;
820 
821                         --level;
822                 }
823         }
824 
825         return 0;
826 }
827 
828 int bch2_check_extents_to_backpointers(struct bch_fs *c)
829 {
830         struct btree_trans *trans = bch2_trans_get(c);
831         struct extents_to_bp_state s = { .bucket_start = POS_MIN };
832         int ret;
833 
834         bch2_bkey_buf_init(&s.last_flushed);
835         bkey_init(&s.last_flushed.k->k);
836 
837         while (1) {
838                 struct bbpos end;
839                 ret = bch2_get_btree_in_memory_pos(trans,
840                                 BIT_ULL(BTREE_ID_backpointers),
841                                 BIT_ULL(BTREE_ID_backpointers),
842                                 BBPOS(BTREE_ID_backpointers, s.bucket_start), &end);
843                 if (ret)
844                         break;
845 
846                 s.bucket_end = end.pos;
847 
848                 if ( bpos_eq(s.bucket_start, POS_MIN) &&
849                     !bpos_eq(s.bucket_end, SPOS_MAX))
850                         bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass",
851                                     __func__, btree_nodes_fit_in_ram(c));
852 
853                 if (!bpos_eq(s.bucket_start, POS_MIN) ||
854                     !bpos_eq(s.bucket_end, SPOS_MAX)) {
855                         struct printbuf buf = PRINTBUF;
856 
857                         prt_str(&buf, "check_extents_to_backpointers(): ");
858                         bch2_bpos_to_text(&buf, s.bucket_start);
859                         prt_str(&buf, "-");
860                         bch2_bpos_to_text(&buf, s.bucket_end);
861 
862                         bch_verbose(c, "%s", buf.buf);
863                         printbuf_exit(&buf);
864                 }
865 
866                 ret = bch2_check_extents_to_backpointers_pass(trans, &s);
867                 if (ret || bpos_eq(s.bucket_end, SPOS_MAX))
868                         break;
869 
870                 s.bucket_start = bpos_successor(s.bucket_end);
871         }
872         bch2_trans_put(trans);
873         bch2_bkey_buf_exit(&s.last_flushed, c);
874 
875         c->btree_cache.pinned_nodes_leaf_mask = 0;
876         c->btree_cache.pinned_nodes_interior_mask = 0;
877 
878         bch_err_fn(c, ret);
879         return ret;
880 }
881 
882 static int check_one_backpointer(struct btree_trans *trans,
883                                  struct bbpos start,
884                                  struct bbpos end,
885                                  struct bkey_s_c_backpointer bp,
886                                  struct bkey_buf *last_flushed)
887 {
888         struct bch_fs *c = trans->c;
889         struct btree_iter iter;
890         struct bbpos pos = bp_to_bbpos(*bp.v);
891         struct bkey_s_c k;
892         struct printbuf buf = PRINTBUF;
893         int ret;
894 
895         if (bbpos_cmp(pos, start) < 0 ||
896             bbpos_cmp(pos, end) > 0)
897                 return 0;
898 
899         k = bch2_backpointer_get_key(trans, &iter, bp.k->p, *bp.v, 0);
900         ret = bkey_err(k);
901         if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
902                 return 0;
903         if (ret)
904                 return ret;
905 
906         if (!k.k) {
907                 ret = bch2_btree_write_buffer_maybe_flush(trans, bp.s_c, last_flushed);
908                 if (ret)
909                         goto out;
910 
911                 if (fsck_err(trans, backpointer_to_missing_ptr,
912                              "backpointer for missing %s\n  %s",
913                              bp.v->level ? "btree node" : "extent",
914                              (bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) {
915                         ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p);
916                         goto out;
917                 }
918         }
919 out:
920 fsck_err:
921         bch2_trans_iter_exit(trans, &iter);
922         printbuf_exit(&buf);
923         return ret;
924 }
925 
926 static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
927                                                    struct bbpos start,
928                                                    struct bbpos end)
929 {
930         struct bkey_buf last_flushed;
931 
932         bch2_bkey_buf_init(&last_flushed);
933         bkey_init(&last_flushed.k->k);
934 
935         int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers,
936                                   POS_MIN, BTREE_ITER_prefetch, k,
937                                   NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
938                 check_one_backpointer(trans, start, end,
939                                       bkey_s_c_to_backpointer(k),
940                                       &last_flushed));
941 
942         bch2_bkey_buf_exit(&last_flushed, trans->c);
943         return ret;
944 }
945 
946 int bch2_check_backpointers_to_extents(struct bch_fs *c)
947 {
948         struct btree_trans *trans = bch2_trans_get(c);
949         struct bbpos start = (struct bbpos) { .btree = 0, .pos = POS_MIN, }, end;
950         int ret;
951 
952         while (1) {
953                 ret = bch2_get_btree_in_memory_pos(trans,
954                                                    BIT_ULL(BTREE_ID_extents)|
955                                                    BIT_ULL(BTREE_ID_reflink),
956                                                    ~0,
957                                                    start, &end);
958                 if (ret)
959                         break;
960 
961                 if (!bbpos_cmp(start, BBPOS_MIN) &&
962                     bbpos_cmp(end, BBPOS_MAX))
963                         bch_verbose(c, "%s(): extents do not fit in ram, running in multiple passes with %zu nodes per pass",
964                                     __func__, btree_nodes_fit_in_ram(c));
965 
966                 if (bbpos_cmp(start, BBPOS_MIN) ||
967                     bbpos_cmp(end, BBPOS_MAX)) {
968                         struct printbuf buf = PRINTBUF;
969 
970                         prt_str(&buf, "check_backpointers_to_extents(): ");
971                         bch2_bbpos_to_text(&buf, start);
972                         prt_str(&buf, "-");
973                         bch2_bbpos_to_text(&buf, end);
974 
975                         bch_verbose(c, "%s", buf.buf);
976                         printbuf_exit(&buf);
977                 }
978 
979                 ret = bch2_check_backpointers_to_extents_pass(trans, start, end);
980                 if (ret || !bbpos_cmp(end, BBPOS_MAX))
981                         break;
982 
983                 start = bbpos_successor(end);
984         }
985         bch2_trans_put(trans);
986 
987         c->btree_cache.pinned_nodes_leaf_mask = 0;
988         c->btree_cache.pinned_nodes_interior_mask = 0;
989 
990         bch_err_fn(c, ret);
991         return ret;
992 }
993 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php