~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/fs/bcachefs/data_update.c

Version: ~ [ linux-6.11-rc3 ] ~ [ linux-6.10.4 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.45 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.104 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.164 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.223 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.281 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.319 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0
  2 
  3 #include "bcachefs.h"
  4 #include "alloc_foreground.h"
  5 #include "bkey_buf.h"
  6 #include "btree_update.h"
  7 #include "buckets.h"
  8 #include "compress.h"
  9 #include "data_update.h"
 10 #include "disk_groups.h"
 11 #include "ec.h"
 12 #include "error.h"
 13 #include "extents.h"
 14 #include "io_write.h"
 15 #include "keylist.h"
 16 #include "move.h"
 17 #include "nocow_locking.h"
 18 #include "rebalance.h"
 19 #include "snapshot.h"
 20 #include "subvolume.h"
 21 #include "trace.h"
 22 
 23 static void trace_move_extent_finish2(struct bch_fs *c, struct bkey_s_c k)
 24 {
 25         if (trace_move_extent_finish_enabled()) {
 26                 struct printbuf buf = PRINTBUF;
 27 
 28                 bch2_bkey_val_to_text(&buf, c, k);
 29                 trace_move_extent_finish(c, buf.buf);
 30                 printbuf_exit(&buf);
 31         }
 32 }
 33 
 34 static void trace_move_extent_fail2(struct data_update *m,
 35                          struct bkey_s_c new,
 36                          struct bkey_s_c wrote,
 37                          struct bkey_i *insert,
 38                          const char *msg)
 39 {
 40         struct bch_fs *c = m->op.c;
 41         struct bkey_s_c old = bkey_i_to_s_c(m->k.k);
 42         const union bch_extent_entry *entry;
 43         struct bch_extent_ptr *ptr;
 44         struct extent_ptr_decoded p;
 45         struct printbuf buf = PRINTBUF;
 46         unsigned i, rewrites_found = 0;
 47 
 48         if (!trace_move_extent_fail_enabled())
 49                 return;
 50 
 51         prt_str(&buf, msg);
 52 
 53         if (insert) {
 54                 i = 0;
 55                 bkey_for_each_ptr_decode(old.k, bch2_bkey_ptrs_c(old), p, entry) {
 56                         if (((1U << i) & m->data_opts.rewrite_ptrs) &&
 57                             (ptr = bch2_extent_has_ptr(old, p, bkey_i_to_s(insert))) &&
 58                             !ptr->cached)
 59                                 rewrites_found |= 1U << i;
 60                         i++;
 61                 }
 62         }
 63 
 64         prt_printf(&buf, "\nrewrite ptrs:   %u%u%u%u",
 65                    (m->data_opts.rewrite_ptrs & (1 << 0)) != 0,
 66                    (m->data_opts.rewrite_ptrs & (1 << 1)) != 0,
 67                    (m->data_opts.rewrite_ptrs & (1 << 2)) != 0,
 68                    (m->data_opts.rewrite_ptrs & (1 << 3)) != 0);
 69 
 70         prt_printf(&buf, "\nrewrites found: %u%u%u%u",
 71                    (rewrites_found & (1 << 0)) != 0,
 72                    (rewrites_found & (1 << 1)) != 0,
 73                    (rewrites_found & (1 << 2)) != 0,
 74                    (rewrites_found & (1 << 3)) != 0);
 75 
 76         prt_str(&buf, "\nold:    ");
 77         bch2_bkey_val_to_text(&buf, c, old);
 78 
 79         prt_str(&buf, "\nnew:    ");
 80         bch2_bkey_val_to_text(&buf, c, new);
 81 
 82         prt_str(&buf, "\nwrote:  ");
 83         bch2_bkey_val_to_text(&buf, c, wrote);
 84 
 85         if (insert) {
 86                 prt_str(&buf, "\ninsert: ");
 87                 bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert));
 88         }
 89 
 90         trace_move_extent_fail(c, buf.buf);
 91         printbuf_exit(&buf);
 92 }
 93 
 94 static int __bch2_data_update_index_update(struct btree_trans *trans,
 95                                            struct bch_write_op *op)
 96 {
 97         struct bch_fs *c = op->c;
 98         struct btree_iter iter;
 99         struct data_update *m =
100                 container_of(op, struct data_update, op);
101         struct keylist *keys = &op->insert_keys;
102         struct bkey_buf _new, _insert;
103         int ret = 0;
104 
105         bch2_bkey_buf_init(&_new);
106         bch2_bkey_buf_init(&_insert);
107         bch2_bkey_buf_realloc(&_insert, c, U8_MAX);
108 
109         bch2_trans_iter_init(trans, &iter, m->btree_id,
110                              bkey_start_pos(&bch2_keylist_front(keys)->k),
111                              BTREE_ITER_slots|BTREE_ITER_intent);
112 
113         while (1) {
114                 struct bkey_s_c k;
115                 struct bkey_s_c old = bkey_i_to_s_c(m->k.k);
116                 struct bkey_i *insert = NULL;
117                 struct bkey_i_extent *new;
118                 const union bch_extent_entry *entry_c;
119                 union bch_extent_entry *entry;
120                 struct extent_ptr_decoded p;
121                 struct bch_extent_ptr *ptr;
122                 const struct bch_extent_ptr *ptr_c;
123                 struct bpos next_pos;
124                 bool should_check_enospc;
125                 s64 i_sectors_delta = 0, disk_sectors_delta = 0;
126                 unsigned rewrites_found = 0, durability, i;
127 
128                 bch2_trans_begin(trans);
129 
130                 k = bch2_btree_iter_peek_slot(&iter);
131                 ret = bkey_err(k);
132                 if (ret)
133                         goto err;
134 
135                 new = bkey_i_to_extent(bch2_keylist_front(keys));
136 
137                 if (!bch2_extents_match(k, old)) {
138                         trace_move_extent_fail2(m, k, bkey_i_to_s_c(&new->k_i),
139                                                 NULL, "no match:");
140                         goto nowork;
141                 }
142 
143                 bkey_reassemble(_insert.k, k);
144                 insert = _insert.k;
145 
146                 bch2_bkey_buf_copy(&_new, c, bch2_keylist_front(keys));
147                 new = bkey_i_to_extent(_new.k);
148                 bch2_cut_front(iter.pos, &new->k_i);
149 
150                 bch2_cut_front(iter.pos,        insert);
151                 bch2_cut_back(new->k.p,         insert);
152                 bch2_cut_back(insert->k.p,      &new->k_i);
153 
154                 /*
155                  * @old: extent that we read from
156                  * @insert: key that we're going to update, initialized from
157                  * extent currently in btree - same as @old unless we raced with
158                  * other updates
159                  * @new: extent with new pointers that we'll be adding to @insert
160                  *
161                  * Fist, drop rewrite_ptrs from @new:
162                  */
163                 i = 0;
164                 bkey_for_each_ptr_decode(old.k, bch2_bkey_ptrs_c(old), p, entry_c) {
165                         if (((1U << i) & m->data_opts.rewrite_ptrs) &&
166                             (ptr = bch2_extent_has_ptr(old, p, bkey_i_to_s(insert))) &&
167                             !ptr->cached) {
168                                 bch2_extent_ptr_set_cached(bkey_i_to_s(insert), ptr);
169                                 rewrites_found |= 1U << i;
170                         }
171                         i++;
172                 }
173 
174                 if (m->data_opts.rewrite_ptrs &&
175                     !rewrites_found &&
176                     bch2_bkey_durability(c, k) >= m->op.opts.data_replicas) {
177                         trace_move_extent_fail2(m, k, bkey_i_to_s_c(&new->k_i), insert, "no rewrites found:");
178                         goto nowork;
179                 }
180 
181                 /*
182                  * A replica that we just wrote might conflict with a replica
183                  * that we want to keep, due to racing with another move:
184                  */
185 restart_drop_conflicting_replicas:
186                 extent_for_each_ptr(extent_i_to_s(new), ptr)
187                         if ((ptr_c = bch2_bkey_has_device_c(bkey_i_to_s_c(insert), ptr->dev)) &&
188                             !ptr_c->cached) {
189                                 bch2_bkey_drop_ptr_noerror(bkey_i_to_s(&new->k_i), ptr);
190                                 goto restart_drop_conflicting_replicas;
191                         }
192 
193                 if (!bkey_val_u64s(&new->k)) {
194                         trace_move_extent_fail2(m, k, bkey_i_to_s_c(&new->k_i), insert, "new replicas conflicted:");
195                         goto nowork;
196                 }
197 
198                 /* Now, drop pointers that conflict with what we just wrote: */
199                 extent_for_each_ptr_decode(extent_i_to_s(new), p, entry)
200                         if ((ptr = bch2_bkey_has_device(bkey_i_to_s(insert), p.ptr.dev)))
201                                 bch2_bkey_drop_ptr_noerror(bkey_i_to_s(insert), ptr);
202 
203                 durability = bch2_bkey_durability(c, bkey_i_to_s_c(insert)) +
204                         bch2_bkey_durability(c, bkey_i_to_s_c(&new->k_i));
205 
206                 /* Now, drop excess replicas: */
207                 rcu_read_lock();
208 restart_drop_extra_replicas:
209                 bkey_for_each_ptr_decode(old.k, bch2_bkey_ptrs(bkey_i_to_s(insert)), p, entry) {
210                         unsigned ptr_durability = bch2_extent_ptr_durability(c, &p);
211 
212                         if (!p.ptr.cached &&
213                             durability - ptr_durability >= m->op.opts.data_replicas) {
214                                 durability -= ptr_durability;
215 
216                                 bch2_extent_ptr_set_cached(bkey_i_to_s(insert), &entry->ptr);
217                                 goto restart_drop_extra_replicas;
218                         }
219                 }
220                 rcu_read_unlock();
221 
222                 /* Finally, add the pointers we just wrote: */
223                 extent_for_each_ptr_decode(extent_i_to_s(new), p, entry)
224                         bch2_extent_ptr_decoded_append(insert, &p);
225 
226                 bch2_bkey_narrow_crcs(insert, (struct bch_extent_crc_unpacked) { 0 });
227                 bch2_extent_normalize(c, bkey_i_to_s(insert));
228 
229                 ret = bch2_sum_sector_overwrites(trans, &iter, insert,
230                                                  &should_check_enospc,
231                                                  &i_sectors_delta,
232                                                  &disk_sectors_delta);
233                 if (ret)
234                         goto err;
235 
236                 if (disk_sectors_delta > (s64) op->res.sectors) {
237                         ret = bch2_disk_reservation_add(c, &op->res,
238                                                 disk_sectors_delta - op->res.sectors,
239                                                 !should_check_enospc
240                                                 ? BCH_DISK_RESERVATION_NOFAIL : 0);
241                         if (ret)
242                                 goto out;
243                 }
244 
245                 next_pos = insert->k.p;
246 
247                 /*
248                  * Check for nonce offset inconsistency:
249                  * This is debug code - we've been seeing this bug rarely, and
250                  * it's been hard to reproduce, so this should give us some more
251                  * information when it does occur:
252                  */
253                 struct printbuf err = PRINTBUF;
254                 int invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(insert), __btree_node_type(0, m->btree_id), 0, &err);
255                 printbuf_exit(&err);
256 
257                 if (invalid) {
258                         struct printbuf buf = PRINTBUF;
259 
260                         prt_str(&buf, "about to insert invalid key in data update path");
261                         prt_str(&buf, "\nold: ");
262                         bch2_bkey_val_to_text(&buf, c, old);
263                         prt_str(&buf, "\nk:   ");
264                         bch2_bkey_val_to_text(&buf, c, k);
265                         prt_str(&buf, "\nnew: ");
266                         bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert));
267 
268                         bch2_print_string_as_lines(KERN_ERR, buf.buf);
269                         printbuf_exit(&buf);
270 
271                         bch2_fatal_error(c);
272                         goto out;
273                 }
274 
275                 if (trace_data_update_enabled()) {
276                         struct printbuf buf = PRINTBUF;
277 
278                         prt_str(&buf, "\nold: ");
279                         bch2_bkey_val_to_text(&buf, c, old);
280                         prt_str(&buf, "\nk:   ");
281                         bch2_bkey_val_to_text(&buf, c, k);
282                         prt_str(&buf, "\nnew: ");
283                         bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert));
284 
285                         trace_data_update(c, buf.buf);
286                         printbuf_exit(&buf);
287                 }
288 
289                 ret =   bch2_insert_snapshot_whiteouts(trans, m->btree_id,
290                                                 k.k->p, bkey_start_pos(&insert->k)) ?:
291                         bch2_insert_snapshot_whiteouts(trans, m->btree_id,
292                                                 k.k->p, insert->k.p) ?:
293                         bch2_bkey_set_needs_rebalance(c, insert, &op->opts) ?:
294                         bch2_trans_update(trans, &iter, insert,
295                                 BTREE_UPDATE_internal_snapshot_node) ?:
296                         bch2_trans_commit(trans, &op->res,
297                                 NULL,
298                                 BCH_TRANS_COMMIT_no_check_rw|
299                                 BCH_TRANS_COMMIT_no_enospc|
300                                 m->data_opts.btree_insert_flags);
301                 if (!ret) {
302                         bch2_btree_iter_set_pos(&iter, next_pos);
303 
304                         this_cpu_add(c->counters[BCH_COUNTER_move_extent_finish], new->k.size);
305                         trace_move_extent_finish2(c, bkey_i_to_s_c(&new->k_i));
306                 }
307 err:
308                 if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
309                         ret = 0;
310                 if (ret)
311                         break;
312 next:
313                 while (bkey_ge(iter.pos, bch2_keylist_front(keys)->k.p)) {
314                         bch2_keylist_pop_front(keys);
315                         if (bch2_keylist_empty(keys))
316                                 goto out;
317                 }
318                 continue;
319 nowork:
320                 if (m->stats) {
321                         BUG_ON(k.k->p.offset <= iter.pos.offset);
322                         atomic64_inc(&m->stats->keys_raced);
323                         atomic64_add(k.k->p.offset - iter.pos.offset,
324                                      &m->stats->sectors_raced);
325                 }
326 
327                 count_event(c, move_extent_fail);
328 
329                 bch2_btree_iter_advance(&iter);
330                 goto next;
331         }
332 out:
333         bch2_trans_iter_exit(trans, &iter);
334         bch2_bkey_buf_exit(&_insert, c);
335         bch2_bkey_buf_exit(&_new, c);
336         BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart));
337         return ret;
338 }
339 
340 int bch2_data_update_index_update(struct bch_write_op *op)
341 {
342         return bch2_trans_run(op->c, __bch2_data_update_index_update(trans, op));
343 }
344 
345 void bch2_data_update_read_done(struct data_update *m,
346                                 struct bch_extent_crc_unpacked crc)
347 {
348         /* write bio must own pages: */
349         BUG_ON(!m->op.wbio.bio.bi_vcnt);
350 
351         m->op.crc = crc;
352         m->op.wbio.bio.bi_iter.bi_size = crc.compressed_size << 9;
353 
354         closure_call(&m->op.cl, bch2_write, NULL, NULL);
355 }
356 
357 void bch2_data_update_exit(struct data_update *update)
358 {
359         struct bch_fs *c = update->op.c;
360         struct bkey_ptrs_c ptrs =
361                 bch2_bkey_ptrs_c(bkey_i_to_s_c(update->k.k));
362 
363         bkey_for_each_ptr(ptrs, ptr) {
364                 struct bch_dev *ca = bch2_dev_have_ref(c, ptr->dev);
365                 if (c->opts.nocow_enabled)
366                         bch2_bucket_nocow_unlock(&c->nocow_locks,
367                                                  PTR_BUCKET_POS(ca, ptr), 0);
368                 bch2_dev_put(ca);
369         }
370 
371         bch2_bkey_buf_exit(&update->k, c);
372         bch2_disk_reservation_put(c, &update->op.res);
373         bch2_bio_free_pages_pool(c, &update->op.wbio.bio);
374 }
375 
376 static void bch2_update_unwritten_extent(struct btree_trans *trans,
377                                   struct data_update *update)
378 {
379         struct bch_fs *c = update->op.c;
380         struct bio *bio = &update->op.wbio.bio;
381         struct bkey_i_extent *e;
382         struct write_point *wp;
383         struct closure cl;
384         struct btree_iter iter;
385         struct bkey_s_c k;
386         int ret;
387 
388         closure_init_stack(&cl);
389         bch2_keylist_init(&update->op.insert_keys, update->op.inline_keys);
390 
391         while (bio_sectors(bio)) {
392                 unsigned sectors = bio_sectors(bio);
393 
394                 bch2_trans_begin(trans);
395 
396                 bch2_trans_iter_init(trans, &iter, update->btree_id, update->op.pos,
397                                      BTREE_ITER_slots);
398                 ret = lockrestart_do(trans, ({
399                         k = bch2_btree_iter_peek_slot(&iter);
400                         bkey_err(k);
401                 }));
402                 bch2_trans_iter_exit(trans, &iter);
403 
404                 if (ret || !bch2_extents_match(k, bkey_i_to_s_c(update->k.k)))
405                         break;
406 
407                 e = bkey_extent_init(update->op.insert_keys.top);
408                 e->k.p = update->op.pos;
409 
410                 ret = bch2_alloc_sectors_start_trans(trans,
411                                 update->op.target,
412                                 false,
413                                 update->op.write_point,
414                                 &update->op.devs_have,
415                                 update->op.nr_replicas,
416                                 update->op.nr_replicas,
417                                 update->op.watermark,
418                                 0, &cl, &wp);
419                 if (bch2_err_matches(ret, BCH_ERR_operation_blocked)) {
420                         bch2_trans_unlock(trans);
421                         closure_sync(&cl);
422                         continue;
423                 }
424 
425                 bch_err_fn_ratelimited(c, ret);
426 
427                 if (ret)
428                         return;
429 
430                 sectors = min(sectors, wp->sectors_free);
431 
432                 bch2_key_resize(&e->k, sectors);
433 
434                 bch2_open_bucket_get(c, wp, &update->op.open_buckets);
435                 bch2_alloc_sectors_append_ptrs(c, wp, &e->k_i, sectors, false);
436                 bch2_alloc_sectors_done(c, wp);
437 
438                 bio_advance(bio, sectors << 9);
439                 update->op.pos.offset += sectors;
440 
441                 extent_for_each_ptr(extent_i_to_s(e), ptr)
442                         ptr->unwritten = true;
443                 bch2_keylist_push(&update->op.insert_keys);
444 
445                 ret = __bch2_data_update_index_update(trans, &update->op);
446 
447                 bch2_open_buckets_put(c, &update->op.open_buckets);
448 
449                 if (ret)
450                         break;
451         }
452 
453         if (closure_nr_remaining(&cl) != 1) {
454                 bch2_trans_unlock(trans);
455                 closure_sync(&cl);
456         }
457 }
458 
459 void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c,
460                                    struct bch_io_opts *io_opts,
461                                    struct data_update_opts *data_opts)
462 {
463         printbuf_tabstop_push(out, 20);
464         prt_str(out, "rewrite ptrs:\t");
465         bch2_prt_u64_base2(out, data_opts->rewrite_ptrs);
466         prt_newline(out);
467 
468         prt_str(out, "kill ptrs:\t");
469         bch2_prt_u64_base2(out, data_opts->kill_ptrs);
470         prt_newline(out);
471 
472         prt_str(out, "target:\t");
473         bch2_target_to_text(out, c, data_opts->target);
474         prt_newline(out);
475 
476         prt_str(out, "compression:\t");
477         bch2_compression_opt_to_text(out, background_compression(*io_opts));
478         prt_newline(out);
479 
480         prt_str(out, "extra replicas:\t");
481         prt_u64(out, data_opts->extra_replicas);
482 }
483 
484 void bch2_data_update_to_text(struct printbuf *out, struct data_update *m)
485 {
486         bch2_bkey_val_to_text(out, m->op.c, bkey_i_to_s_c(m->k.k));
487         prt_newline(out);
488         bch2_data_update_opts_to_text(out, m->op.c, &m->op.opts, &m->data_opts);
489 }
490 
491 int bch2_extent_drop_ptrs(struct btree_trans *trans,
492                           struct btree_iter *iter,
493                           struct bkey_s_c k,
494                           struct data_update_opts data_opts)
495 {
496         struct bch_fs *c = trans->c;
497         struct bkey_i *n;
498         int ret;
499 
500         n = bch2_bkey_make_mut_noupdate(trans, k);
501         ret = PTR_ERR_OR_ZERO(n);
502         if (ret)
503                 return ret;
504 
505         while (data_opts.kill_ptrs) {
506                 unsigned i = 0, drop = __fls(data_opts.kill_ptrs);
507 
508                 bch2_bkey_drop_ptrs(bkey_i_to_s(n), ptr, i++ == drop);
509                 data_opts.kill_ptrs ^= 1U << drop;
510         }
511 
512         /*
513          * If the new extent no longer has any pointers, bch2_extent_normalize()
514          * will do the appropriate thing with it (turning it into a
515          * KEY_TYPE_error key, or just a discard if it was a cached extent)
516          */
517         bch2_extent_normalize(c, bkey_i_to_s(n));
518 
519         /*
520          * Since we're not inserting through an extent iterator
521          * (BTREE_ITER_all_snapshots iterators aren't extent iterators),
522          * we aren't using the extent overwrite path to delete, we're
523          * just using the normal key deletion path:
524          */
525         if (bkey_deleted(&n->k) && !(iter->flags & BTREE_ITER_is_extents))
526                 n->k.size = 0;
527 
528         return bch2_trans_relock(trans) ?:
529                 bch2_trans_update(trans, iter, n, BTREE_UPDATE_internal_snapshot_node) ?:
530                 bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
531 }
532 
533 int bch2_data_update_init(struct btree_trans *trans,
534                           struct btree_iter *iter,
535                           struct moving_context *ctxt,
536                           struct data_update *m,
537                           struct write_point_specifier wp,
538                           struct bch_io_opts io_opts,
539                           struct data_update_opts data_opts,
540                           enum btree_id btree_id,
541                           struct bkey_s_c k)
542 {
543         struct bch_fs *c = trans->c;
544         struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
545         const union bch_extent_entry *entry;
546         struct extent_ptr_decoded p;
547         unsigned i, reserve_sectors = k.k->size * data_opts.extra_replicas;
548         unsigned ptrs_locked = 0;
549         int ret = 0;
550 
551         /*
552          * fs is corrupt  we have a key for a snapshot node that doesn't exist,
553          * and we have to check for this because we go rw before repairing the
554          * snapshots table - just skip it, we can move it later.
555          */
556         if (unlikely(k.k->p.snapshot && !bch2_snapshot_equiv(c, k.k->p.snapshot)))
557                 return -BCH_ERR_data_update_done;
558 
559         bch2_bkey_buf_init(&m->k);
560         bch2_bkey_buf_reassemble(&m->k, c, k);
561         m->btree_id     = btree_id;
562         m->data_opts    = data_opts;
563         m->ctxt         = ctxt;
564         m->stats        = ctxt ? ctxt->stats : NULL;
565 
566         bch2_write_op_init(&m->op, c, io_opts);
567         m->op.pos       = bkey_start_pos(k.k);
568         m->op.version   = k.k->version;
569         m->op.target    = data_opts.target;
570         m->op.write_point = wp;
571         m->op.nr_replicas = 0;
572         m->op.flags     |= BCH_WRITE_PAGES_STABLE|
573                 BCH_WRITE_PAGES_OWNED|
574                 BCH_WRITE_DATA_ENCODED|
575                 BCH_WRITE_MOVE|
576                 m->data_opts.write_flags;
577         m->op.compression_opt   = background_compression(io_opts);
578         m->op.watermark         = m->data_opts.btree_insert_flags & BCH_WATERMARK_MASK;
579 
580         bkey_for_each_ptr(ptrs, ptr) {
581                 if (!bch2_dev_tryget(c, ptr->dev)) {
582                         bkey_for_each_ptr(ptrs, ptr2) {
583                                 if (ptr2 == ptr)
584                                         break;
585                                 bch2_dev_put(bch2_dev_have_ref(c, ptr2->dev));
586                         }
587                         return -BCH_ERR_data_update_done;
588                 }
589         }
590 
591         unsigned durability_have = 0, durability_removing = 0;
592 
593         i = 0;
594         bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
595                 struct bch_dev *ca = bch2_dev_have_ref(c, p.ptr.dev);
596                 struct bpos bucket = PTR_BUCKET_POS(ca, &p.ptr);
597                 bool locked;
598 
599                 rcu_read_lock();
600                 if (((1U << i) & m->data_opts.rewrite_ptrs)) {
601                         BUG_ON(p.ptr.cached);
602 
603                         if (crc_is_compressed(p.crc))
604                                 reserve_sectors += k.k->size;
605 
606                         m->op.nr_replicas += bch2_extent_ptr_desired_durability(c, &p);
607                         durability_removing += bch2_extent_ptr_desired_durability(c, &p);
608                 } else if (!p.ptr.cached &&
609                            !((1U << i) & m->data_opts.kill_ptrs)) {
610                         bch2_dev_list_add_dev(&m->op.devs_have, p.ptr.dev);
611                         durability_have += bch2_extent_ptr_durability(c, &p);
612                 }
613                 rcu_read_unlock();
614 
615                 /*
616                  * op->csum_type is normally initialized from the fs/file's
617                  * current options - but if an extent is encrypted, we require
618                  * that it stays encrypted:
619                  */
620                 if (bch2_csum_type_is_encryption(p.crc.csum_type)) {
621                         m->op.nonce     = p.crc.nonce + p.crc.offset;
622                         m->op.csum_type = p.crc.csum_type;
623                 }
624 
625                 if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible)
626                         m->op.incompressible = true;
627 
628                 if (c->opts.nocow_enabled) {
629                         if (ctxt) {
630                                 move_ctxt_wait_event(ctxt,
631                                                 (locked = bch2_bucket_nocow_trylock(&c->nocow_locks,
632                                                                           bucket, 0)) ||
633                                                 list_empty(&ctxt->ios));
634 
635                                 if (!locked)
636                                         bch2_bucket_nocow_lock(&c->nocow_locks, bucket, 0);
637                         } else {
638                                 if (!bch2_bucket_nocow_trylock(&c->nocow_locks, bucket, 0)) {
639                                         ret = -BCH_ERR_nocow_lock_blocked;
640                                         goto err;
641                                 }
642                         }
643                         ptrs_locked |= (1U << i);
644                 }
645 
646                 i++;
647         }
648 
649         unsigned durability_required = max(0, (int) (io_opts.data_replicas - durability_have));
650 
651         /*
652          * If current extent durability is less than io_opts.data_replicas,
653          * we're not trying to rereplicate the extent up to data_replicas here -
654          * unless extra_replicas was specified
655          *
656          * Increasing replication is an explicit operation triggered by
657          * rereplicate, currently, so that users don't get an unexpected -ENOSPC
658          */
659         if (!(m->data_opts.write_flags & BCH_WRITE_CACHED) &&
660             !durability_required) {
661                 m->data_opts.kill_ptrs |= m->data_opts.rewrite_ptrs;
662                 m->data_opts.rewrite_ptrs = 0;
663                 /* if iter == NULL, it's just a promote */
664                 if (iter)
665                         ret = bch2_extent_drop_ptrs(trans, iter, k, m->data_opts);
666                 goto done;
667         }
668 
669         m->op.nr_replicas = min(durability_removing, durability_required) +
670                 m->data_opts.extra_replicas;
671 
672         /*
673          * If device(s) were set to durability=0 after data was written to them
674          * we can end up with a duribilty=0 extent, and the normal algorithm
675          * that tries not to increase durability doesn't work:
676          */
677         if (!(durability_have + durability_removing))
678                 m->op.nr_replicas = max((unsigned) m->op.nr_replicas, 1);
679 
680         if (!m->op.nr_replicas) {
681                 struct printbuf buf = PRINTBUF;
682 
683                 bch2_data_update_to_text(&buf, m);
684                 WARN(1, "trying to move an extent, but nr_replicas=0\n%s", buf.buf);
685                 printbuf_exit(&buf);
686                 ret = -BCH_ERR_data_update_done;
687                 goto done;
688         }
689 
690         m->op.nr_replicas_required = m->op.nr_replicas;
691 
692         if (reserve_sectors) {
693                 ret = bch2_disk_reservation_add(c, &m->op.res, reserve_sectors,
694                                 m->data_opts.extra_replicas
695                                 ? 0
696                                 : BCH_DISK_RESERVATION_NOFAIL);
697                 if (ret)
698                         goto err;
699         }
700 
701         if (bkey_extent_is_unwritten(k)) {
702                 bch2_update_unwritten_extent(trans, m);
703                 goto done;
704         }
705 
706         return 0;
707 err:
708         i = 0;
709         bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
710                 struct bch_dev *ca = bch2_dev_have_ref(c, p.ptr.dev);
711                 struct bpos bucket = PTR_BUCKET_POS(ca, &p.ptr);
712                 if ((1U << i) & ptrs_locked)
713                         bch2_bucket_nocow_unlock(&c->nocow_locks, bucket, 0);
714                 bch2_dev_put(ca);
715                 i++;
716         }
717 
718         bch2_bkey_buf_exit(&m->k, c);
719         bch2_bio_free_pages_pool(c, &m->op.wbio.bio);
720         return ret;
721 done:
722         bch2_data_update_exit(m);
723         return ret ?: -BCH_ERR_data_update_done;
724 }
725 
726 void bch2_data_update_opts_normalize(struct bkey_s_c k, struct data_update_opts *opts)
727 {
728         struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
729         unsigned i = 0;
730 
731         bkey_for_each_ptr(ptrs, ptr) {
732                 if ((opts->rewrite_ptrs & (1U << i)) && ptr->cached) {
733                         opts->kill_ptrs |= 1U << i;
734                         opts->rewrite_ptrs ^= 1U << i;
735                 }
736 
737                 i++;
738         }
739 }
740 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php