~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/fs/bcachefs/btree_iter.c

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /fs/bcachefs/btree_iter.c (Architecture m68k) and /fs/bcachefs/btree_iter.c (Architecture ppc)


  1 // SPDX-License-Identifier: GPL-2.0                 1 // SPDX-License-Identifier: GPL-2.0
  2                                                     2 
  3 #include "bcachefs.h"                               3 #include "bcachefs.h"
  4 #include "bkey_methods.h"                           4 #include "bkey_methods.h"
  5 #include "bkey_buf.h"                               5 #include "bkey_buf.h"
  6 #include "btree_cache.h"                            6 #include "btree_cache.h"
  7 #include "btree_iter.h"                             7 #include "btree_iter.h"
  8 #include "btree_journal_iter.h"                     8 #include "btree_journal_iter.h"
  9 #include "btree_key_cache.h"                        9 #include "btree_key_cache.h"
 10 #include "btree_locking.h"                         10 #include "btree_locking.h"
 11 #include "btree_update.h"                          11 #include "btree_update.h"
 12 #include "debug.h"                                 12 #include "debug.h"
 13 #include "error.h"                                 13 #include "error.h"
 14 #include "extents.h"                               14 #include "extents.h"
 15 #include "journal.h"                               15 #include "journal.h"
 16 #include "journal_io.h"                            16 #include "journal_io.h"
 17 #include "replicas.h"                              17 #include "replicas.h"
 18 #include "snapshot.h"                              18 #include "snapshot.h"
 19 #include "trace.h"                                 19 #include "trace.h"
 20                                                    20 
 21 #include <linux/random.h>                          21 #include <linux/random.h>
 22 #include <linux/prefetch.h>                        22 #include <linux/prefetch.h>
 23                                                    23 
 24 static inline void btree_path_list_remove(stru     24 static inline void btree_path_list_remove(struct btree_trans *, struct btree_path *);
 25 static inline void btree_path_list_add(struct      25 static inline void btree_path_list_add(struct btree_trans *,
 26                         btree_path_idx_t, btre     26                         btree_path_idx_t, btree_path_idx_t);
 27                                                    27 
 28 static inline unsigned long btree_iter_ip_allo     28 static inline unsigned long btree_iter_ip_allocated(struct btree_iter *iter)
 29 {                                                  29 {
 30 #ifdef TRACK_PATH_ALLOCATED                        30 #ifdef TRACK_PATH_ALLOCATED
 31         return iter->ip_allocated;                 31         return iter->ip_allocated;
 32 #else                                              32 #else
 33         return 0;                                  33         return 0;
 34 #endif                                             34 #endif
 35 }                                                  35 }
 36                                                    36 
 37 static btree_path_idx_t btree_path_alloc(struc     37 static btree_path_idx_t btree_path_alloc(struct btree_trans *, btree_path_idx_t);
 38 static void bch2_trans_srcu_lock(struct btree_     38 static void bch2_trans_srcu_lock(struct btree_trans *);
 39                                                    39 
 40 static inline int __btree_path_cmp(const struc     40 static inline int __btree_path_cmp(const struct btree_path *l,
 41                                    enum btree_     41                                    enum btree_id        r_btree_id,
 42                                    bool            42                                    bool                 r_cached,
 43                                    struct bpos     43                                    struct bpos          r_pos,
 44                                    unsigned        44                                    unsigned             r_level)
 45 {                                                  45 {
 46         /*                                         46         /*
 47          * Must match lock ordering as defined     47          * Must match lock ordering as defined by __bch2_btree_node_lock:
 48          */                                        48          */
 49         return   cmp_int(l->btree_id,   r_btre     49         return   cmp_int(l->btree_id,   r_btree_id) ?:
 50                  cmp_int((int) l->cached,          50                  cmp_int((int) l->cached,       (int) r_cached) ?:
 51                  bpos_cmp(l->pos,       r_pos)     51                  bpos_cmp(l->pos,       r_pos) ?:
 52                 -cmp_int(l->level,      r_leve     52                 -cmp_int(l->level,      r_level);
 53 }                                                  53 }
 54                                                    54 
 55 static inline int btree_path_cmp(const struct      55 static inline int btree_path_cmp(const struct btree_path *l,
 56                                  const struct      56                                  const struct btree_path *r)
 57 {                                                  57 {
 58         return __btree_path_cmp(l, r->btree_id     58         return __btree_path_cmp(l, r->btree_id, r->cached, r->pos, r->level);
 59 }                                                  59 }
 60                                                    60 
 61 static inline struct bpos bkey_successor(struc     61 static inline struct bpos bkey_successor(struct btree_iter *iter, struct bpos p)
 62 {                                                  62 {
 63         /* Are we iterating over keys in all s     63         /* Are we iterating over keys in all snapshots? */
 64         if (iter->flags & BTREE_ITER_all_snaps     64         if (iter->flags & BTREE_ITER_all_snapshots) {
 65                 p = bpos_successor(p);             65                 p = bpos_successor(p);
 66         } else {                                   66         } else {
 67                 p = bpos_nosnap_successor(p);      67                 p = bpos_nosnap_successor(p);
 68                 p.snapshot = iter->snapshot;       68                 p.snapshot = iter->snapshot;
 69         }                                          69         }
 70                                                    70 
 71         return p;                                  71         return p;
 72 }                                                  72 }
 73                                                    73 
 74 static inline struct bpos bkey_predecessor(str     74 static inline struct bpos bkey_predecessor(struct btree_iter *iter, struct bpos p)
 75 {                                                  75 {
 76         /* Are we iterating over keys in all s     76         /* Are we iterating over keys in all snapshots? */
 77         if (iter->flags & BTREE_ITER_all_snaps     77         if (iter->flags & BTREE_ITER_all_snapshots) {
 78                 p = bpos_predecessor(p);           78                 p = bpos_predecessor(p);
 79         } else {                                   79         } else {
 80                 p = bpos_nosnap_predecessor(p)     80                 p = bpos_nosnap_predecessor(p);
 81                 p.snapshot = iter->snapshot;       81                 p.snapshot = iter->snapshot;
 82         }                                          82         }
 83                                                    83 
 84         return p;                                  84         return p;
 85 }                                                  85 }
 86                                                    86 
 87 static inline struct bpos btree_iter_search_ke     87 static inline struct bpos btree_iter_search_key(struct btree_iter *iter)
 88 {                                                  88 {
 89         struct bpos pos = iter->pos;               89         struct bpos pos = iter->pos;
 90                                                    90 
 91         if ((iter->flags & BTREE_ITER_is_exten     91         if ((iter->flags & BTREE_ITER_is_extents) &&
 92             !bkey_eq(pos, POS_MAX))                92             !bkey_eq(pos, POS_MAX))
 93                 pos = bkey_successor(iter, pos     93                 pos = bkey_successor(iter, pos);
 94         return pos;                                94         return pos;
 95 }                                                  95 }
 96                                                    96 
 97 static inline bool btree_path_pos_before_node(     97 static inline bool btree_path_pos_before_node(struct btree_path *path,
 98                                                    98                                               struct btree *b)
 99 {                                                  99 {
100         return bpos_lt(path->pos, b->data->min    100         return bpos_lt(path->pos, b->data->min_key);
101 }                                                 101 }
102                                                   102 
103 static inline bool btree_path_pos_after_node(s    103 static inline bool btree_path_pos_after_node(struct btree_path *path,
104                                              s    104                                              struct btree *b)
105 {                                                 105 {
106         return bpos_gt(path->pos, b->key.k.p);    106         return bpos_gt(path->pos, b->key.k.p);
107 }                                                 107 }
108                                                   108 
109 static inline bool btree_path_pos_in_node(stru    109 static inline bool btree_path_pos_in_node(struct btree_path *path,
110                                           stru    110                                           struct btree *b)
111 {                                                 111 {
112         return path->btree_id == b->c.btree_id    112         return path->btree_id == b->c.btree_id &&
113                 !btree_path_pos_before_node(pa    113                 !btree_path_pos_before_node(path, b) &&
114                 !btree_path_pos_after_node(pat    114                 !btree_path_pos_after_node(path, b);
115 }                                                 115 }
116                                                   116 
117 /* Btree iterator: */                             117 /* Btree iterator: */
118                                                   118 
119 #ifdef CONFIG_BCACHEFS_DEBUG                      119 #ifdef CONFIG_BCACHEFS_DEBUG
120                                                   120 
121 static void bch2_btree_path_verify_cached(stru    121 static void bch2_btree_path_verify_cached(struct btree_trans *trans,
122                                           stru    122                                           struct btree_path *path)
123 {                                                 123 {
124         struct bkey_cached *ck;                   124         struct bkey_cached *ck;
125         bool locked = btree_node_locked(path,     125         bool locked = btree_node_locked(path, 0);
126                                                   126 
127         if (!bch2_btree_node_relock(trans, pat    127         if (!bch2_btree_node_relock(trans, path, 0))
128                 return;                           128                 return;
129                                                   129 
130         ck = (void *) path->l[0].b;               130         ck = (void *) path->l[0].b;
131         BUG_ON(ck->key.btree_id != path->btree    131         BUG_ON(ck->key.btree_id != path->btree_id ||
132                !bkey_eq(ck->key.pos, path->pos    132                !bkey_eq(ck->key.pos, path->pos));
133                                                   133 
134         if (!locked)                              134         if (!locked)
135                 btree_node_unlock(trans, path,    135                 btree_node_unlock(trans, path, 0);
136 }                                                 136 }
137                                                   137 
138 static void bch2_btree_path_verify_level(struc    138 static void bch2_btree_path_verify_level(struct btree_trans *trans,
139                                 struct btree_p    139                                 struct btree_path *path, unsigned level)
140 {                                                 140 {
141         struct btree_path_level *l;               141         struct btree_path_level *l;
142         struct btree_node_iter tmp;               142         struct btree_node_iter tmp;
143         bool locked;                              143         bool locked;
144         struct bkey_packed *p, *k;                144         struct bkey_packed *p, *k;
145         struct printbuf buf1 = PRINTBUF;          145         struct printbuf buf1 = PRINTBUF;
146         struct printbuf buf2 = PRINTBUF;          146         struct printbuf buf2 = PRINTBUF;
147         struct printbuf buf3 = PRINTBUF;          147         struct printbuf buf3 = PRINTBUF;
148         const char *msg;                          148         const char *msg;
149                                                   149 
150         if (!bch2_debug_check_iterators)          150         if (!bch2_debug_check_iterators)
151                 return;                           151                 return;
152                                                   152 
153         l       = &path->l[level];                153         l       = &path->l[level];
154         tmp     = l->iter;                        154         tmp     = l->iter;
155         locked  = btree_node_locked(path, leve    155         locked  = btree_node_locked(path, level);
156                                                   156 
157         if (path->cached) {                       157         if (path->cached) {
158                 if (!level)                       158                 if (!level)
159                         bch2_btree_path_verify    159                         bch2_btree_path_verify_cached(trans, path);
160                 return;                           160                 return;
161         }                                         161         }
162                                                   162 
163         if (!btree_path_node(path, level))        163         if (!btree_path_node(path, level))
164                 return;                           164                 return;
165                                                   165 
166         if (!bch2_btree_node_relock_notrace(tr    166         if (!bch2_btree_node_relock_notrace(trans, path, level))
167                 return;                           167                 return;
168                                                   168 
169         BUG_ON(!btree_path_pos_in_node(path, l    169         BUG_ON(!btree_path_pos_in_node(path, l->b));
170                                                   170 
171         bch2_btree_node_iter_verify(&l->iter,     171         bch2_btree_node_iter_verify(&l->iter, l->b);
172                                                   172 
173         /*                                        173         /*
174          * For interior nodes, the iterator wi    174          * For interior nodes, the iterator will have skipped past deleted keys:
175          */                                       175          */
176         p = level                                 176         p = level
177                 ? bch2_btree_node_iter_prev(&t    177                 ? bch2_btree_node_iter_prev(&tmp, l->b)
178                 : bch2_btree_node_iter_prev_al    178                 : bch2_btree_node_iter_prev_all(&tmp, l->b);
179         k = bch2_btree_node_iter_peek_all(&l->    179         k = bch2_btree_node_iter_peek_all(&l->iter, l->b);
180                                                   180 
181         if (p && bkey_iter_pos_cmp(l->b, p, &p    181         if (p && bkey_iter_pos_cmp(l->b, p, &path->pos) >= 0) {
182                 msg = "before";                   182                 msg = "before";
183                 goto err;                         183                 goto err;
184         }                                         184         }
185                                                   185 
186         if (k && bkey_iter_pos_cmp(l->b, k, &p    186         if (k && bkey_iter_pos_cmp(l->b, k, &path->pos) < 0) {
187                 msg = "after";                    187                 msg = "after";
188                 goto err;                         188                 goto err;
189         }                                         189         }
190                                                   190 
191         if (!locked)                              191         if (!locked)
192                 btree_node_unlock(trans, path,    192                 btree_node_unlock(trans, path, level);
193         return;                                   193         return;
194 err:                                              194 err:
195         bch2_bpos_to_text(&buf1, path->pos);      195         bch2_bpos_to_text(&buf1, path->pos);
196                                                   196 
197         if (p) {                                  197         if (p) {
198                 struct bkey uk = bkey_unpack_k    198                 struct bkey uk = bkey_unpack_key(l->b, p);
199                                                   199 
200                 bch2_bkey_to_text(&buf2, &uk);    200                 bch2_bkey_to_text(&buf2, &uk);
201         } else {                                  201         } else {
202                 prt_printf(&buf2, "(none)");      202                 prt_printf(&buf2, "(none)");
203         }                                         203         }
204                                                   204 
205         if (k) {                                  205         if (k) {
206                 struct bkey uk = bkey_unpack_k    206                 struct bkey uk = bkey_unpack_key(l->b, k);
207                                                   207 
208                 bch2_bkey_to_text(&buf3, &uk);    208                 bch2_bkey_to_text(&buf3, &uk);
209         } else {                                  209         } else {
210                 prt_printf(&buf3, "(none)");      210                 prt_printf(&buf3, "(none)");
211         }                                         211         }
212                                                   212 
213         panic("path should be %s key at level     213         panic("path should be %s key at level %u:\n"
214               "path pos %s\n"                     214               "path pos %s\n"
215               "prev key %s\n"                     215               "prev key %s\n"
216               "cur  key %s\n",                    216               "cur  key %s\n",
217               msg, level, buf1.buf, buf2.buf,     217               msg, level, buf1.buf, buf2.buf, buf3.buf);
218 }                                                 218 }
219                                                   219 
220 static void bch2_btree_path_verify(struct btre    220 static void bch2_btree_path_verify(struct btree_trans *trans,
221                                    struct btre    221                                    struct btree_path *path)
222 {                                                 222 {
223         struct bch_fs *c = trans->c;              223         struct bch_fs *c = trans->c;
224                                                   224 
225         for (unsigned i = 0; i < (!path->cache    225         for (unsigned i = 0; i < (!path->cached ? BTREE_MAX_DEPTH : 1); i++) {
226                 if (!path->l[i].b) {              226                 if (!path->l[i].b) {
227                         BUG_ON(!path->cached &    227                         BUG_ON(!path->cached &&
228                                bch2_btree_id_r    228                                bch2_btree_id_root(c, path->btree_id)->b->c.level > i);
229                         break;                    229                         break;
230                 }                                 230                 }
231                                                   231 
232                 bch2_btree_path_verify_level(t    232                 bch2_btree_path_verify_level(trans, path, i);
233         }                                         233         }
234                                                   234 
235         bch2_btree_path_verify_locks(path);       235         bch2_btree_path_verify_locks(path);
236 }                                                 236 }
237                                                   237 
238 void bch2_trans_verify_paths(struct btree_tran    238 void bch2_trans_verify_paths(struct btree_trans *trans)
239 {                                                 239 {
240         struct btree_path *path;                  240         struct btree_path *path;
241         unsigned iter;                            241         unsigned iter;
242                                                   242 
243         trans_for_each_path(trans, path, iter)    243         trans_for_each_path(trans, path, iter)
244                 bch2_btree_path_verify(trans,     244                 bch2_btree_path_verify(trans, path);
245 }                                                 245 }
246                                                   246 
247 static void bch2_btree_iter_verify(struct btre    247 static void bch2_btree_iter_verify(struct btree_iter *iter)
248 {                                                 248 {
249         struct btree_trans *trans = iter->tran    249         struct btree_trans *trans = iter->trans;
250                                                   250 
251         BUG_ON(!!(iter->flags & BTREE_ITER_cac    251         BUG_ON(!!(iter->flags & BTREE_ITER_cached) != btree_iter_path(trans, iter)->cached);
252                                                   252 
253         BUG_ON((iter->flags & BTREE_ITER_is_ex    253         BUG_ON((iter->flags & BTREE_ITER_is_extents) &&
254                (iter->flags & BTREE_ITER_all_s    254                (iter->flags & BTREE_ITER_all_snapshots));
255                                                   255 
256         BUG_ON(!(iter->flags & BTREE_ITER_snap    256         BUG_ON(!(iter->flags & BTREE_ITER_snapshot_field) &&
257                (iter->flags & BTREE_ITER_all_s    257                (iter->flags & BTREE_ITER_all_snapshots) &&
258                !btree_type_has_snapshot_field(    258                !btree_type_has_snapshot_field(iter->btree_id));
259                                                   259 
260         if (iter->update_path)                    260         if (iter->update_path)
261                 bch2_btree_path_verify(trans,     261                 bch2_btree_path_verify(trans, &trans->paths[iter->update_path]);
262         bch2_btree_path_verify(trans, btree_it    262         bch2_btree_path_verify(trans, btree_iter_path(trans, iter));
263 }                                                 263 }
264                                                   264 
265 static void bch2_btree_iter_verify_entry_exit(    265 static void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter)
266 {                                                 266 {
267         BUG_ON((iter->flags & BTREE_ITER_filte    267         BUG_ON((iter->flags & BTREE_ITER_filter_snapshots) &&
268                !iter->pos.snapshot);              268                !iter->pos.snapshot);
269                                                   269 
270         BUG_ON(!(iter->flags & BTREE_ITER_all_    270         BUG_ON(!(iter->flags & BTREE_ITER_all_snapshots) &&
271                iter->pos.snapshot != iter->sna    271                iter->pos.snapshot != iter->snapshot);
272                                                   272 
273         BUG_ON(bkey_lt(iter->pos, bkey_start_p    273         BUG_ON(bkey_lt(iter->pos, bkey_start_pos(&iter->k)) ||
274                bkey_gt(iter->pos, iter->k.p));    274                bkey_gt(iter->pos, iter->k.p));
275 }                                                 275 }
276                                                   276 
277 static int bch2_btree_iter_verify_ret(struct b    277 static int bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c k)
278 {                                                 278 {
279         struct btree_trans *trans = iter->tran    279         struct btree_trans *trans = iter->trans;
280         struct btree_iter copy;                   280         struct btree_iter copy;
281         struct bkey_s_c prev;                     281         struct bkey_s_c prev;
282         int ret = 0;                              282         int ret = 0;
283                                                   283 
284         if (!bch2_debug_check_iterators)          284         if (!bch2_debug_check_iterators)
285                 return 0;                         285                 return 0;
286                                                   286 
287         if (!(iter->flags & BTREE_ITER_filter_    287         if (!(iter->flags & BTREE_ITER_filter_snapshots))
288                 return 0;                         288                 return 0;
289                                                   289 
290         if (bkey_err(k) || !k.k)                  290         if (bkey_err(k) || !k.k)
291                 return 0;                         291                 return 0;
292                                                   292 
293         BUG_ON(!bch2_snapshot_is_ancestor(tran    293         BUG_ON(!bch2_snapshot_is_ancestor(trans->c,
294                                           iter    294                                           iter->snapshot,
295                                           k.k-    295                                           k.k->p.snapshot));
296                                                   296 
297         bch2_trans_iter_init(trans, &copy, ite    297         bch2_trans_iter_init(trans, &copy, iter->btree_id, iter->pos,
298                              BTREE_ITER_nopres    298                              BTREE_ITER_nopreserve|
299                              BTREE_ITER_all_sn    299                              BTREE_ITER_all_snapshots);
300         prev = bch2_btree_iter_prev(&copy);       300         prev = bch2_btree_iter_prev(&copy);
301         if (!prev.k)                              301         if (!prev.k)
302                 goto out;                         302                 goto out;
303                                                   303 
304         ret = bkey_err(prev);                     304         ret = bkey_err(prev);
305         if (ret)                                  305         if (ret)
306                 goto out;                         306                 goto out;
307                                                   307 
308         if (bkey_eq(prev.k->p, k.k->p) &&         308         if (bkey_eq(prev.k->p, k.k->p) &&
309             bch2_snapshot_is_ancestor(trans->c    309             bch2_snapshot_is_ancestor(trans->c, iter->snapshot,
310                                       prev.k->    310                                       prev.k->p.snapshot) > 0) {
311                 struct printbuf buf1 = PRINTBU    311                 struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF;
312                                                   312 
313                 bch2_bkey_to_text(&buf1, k.k);    313                 bch2_bkey_to_text(&buf1, k.k);
314                 bch2_bkey_to_text(&buf2, prev.    314                 bch2_bkey_to_text(&buf2, prev.k);
315                                                   315 
316                 panic("iter snap %u\n"            316                 panic("iter snap %u\n"
317                       "k    %s\n"                 317                       "k    %s\n"
318                       "prev %s\n",                318                       "prev %s\n",
319                       iter->snapshot,             319                       iter->snapshot,
320                       buf1.buf, buf2.buf);        320                       buf1.buf, buf2.buf);
321         }                                         321         }
322 out:                                              322 out:
323         bch2_trans_iter_exit(trans, &copy);       323         bch2_trans_iter_exit(trans, &copy);
324         return ret;                               324         return ret;
325 }                                                 325 }
326                                                   326 
327 void bch2_assert_pos_locked(struct btree_trans    327 void bch2_assert_pos_locked(struct btree_trans *trans, enum btree_id id,
328                             struct bpos pos)      328                             struct bpos pos)
329 {                                                 329 {
330         bch2_trans_verify_not_unlocked(trans);    330         bch2_trans_verify_not_unlocked(trans);
331                                                   331 
332         struct btree_path *path;                  332         struct btree_path *path;
333         struct trans_for_each_path_inorder_ite    333         struct trans_for_each_path_inorder_iter iter;
334         struct printbuf buf = PRINTBUF;           334         struct printbuf buf = PRINTBUF;
335                                                   335 
336         btree_trans_sort_paths(trans);            336         btree_trans_sort_paths(trans);
337                                                   337 
338         trans_for_each_path_inorder(trans, pat    338         trans_for_each_path_inorder(trans, path, iter) {
339                 if (path->btree_id != id ||       339                 if (path->btree_id != id ||
340                     !btree_node_locked(path, 0    340                     !btree_node_locked(path, 0) ||
341                     !path->should_be_locked)      341                     !path->should_be_locked)
342                         continue;                 342                         continue;
343                                                   343 
344                 if (!path->cached) {              344                 if (!path->cached) {
345                         if (bkey_ge(pos, path-    345                         if (bkey_ge(pos, path->l[0].b->data->min_key) &&
346                             bkey_le(pos, path-    346                             bkey_le(pos, path->l[0].b->key.k.p))
347                                 return;           347                                 return;
348                 } else {                          348                 } else {
349                         if (bkey_eq(pos, path-    349                         if (bkey_eq(pos, path->pos))
350                                 return;           350                                 return;
351                 }                                 351                 }
352         }                                         352         }
353                                                   353 
354         bch2_dump_trans_paths_updates(trans);     354         bch2_dump_trans_paths_updates(trans);
355         bch2_bpos_to_text(&buf, pos);             355         bch2_bpos_to_text(&buf, pos);
356                                                   356 
357         panic("not locked: %s %s\n", bch2_btre    357         panic("not locked: %s %s\n", bch2_btree_id_str(id), buf.buf);
358 }                                                 358 }
359                                                   359 
360 #else                                             360 #else
361                                                   361 
362 static inline void bch2_btree_path_verify_leve    362 static inline void bch2_btree_path_verify_level(struct btree_trans *trans,
363                                                   363                                                 struct btree_path *path, unsigned l) {}
364 static inline void bch2_btree_path_verify(stru    364 static inline void bch2_btree_path_verify(struct btree_trans *trans,
365                                           stru    365                                           struct btree_path *path) {}
366 static inline void bch2_btree_iter_verify(stru    366 static inline void bch2_btree_iter_verify(struct btree_iter *iter) {}
367 static inline void bch2_btree_iter_verify_entr    367 static inline void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter) {}
368 static inline int bch2_btree_iter_verify_ret(s    368 static inline int bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c k) { return 0; }
369                                                   369 
370 #endif                                            370 #endif
371                                                   371 
372 /* Btree path: fixups after btree updates */      372 /* Btree path: fixups after btree updates */
373                                                   373 
374 static void btree_node_iter_set_set_pos(struct    374 static void btree_node_iter_set_set_pos(struct btree_node_iter *iter,
375                                         struct    375                                         struct btree *b,
376                                         struct    376                                         struct bset_tree *t,
377                                         struct    377                                         struct bkey_packed *k)
378 {                                                 378 {
379         struct btree_node_iter_set *set;          379         struct btree_node_iter_set *set;
380                                                   380 
381         btree_node_iter_for_each(iter, set)       381         btree_node_iter_for_each(iter, set)
382                 if (set->end == t->end_offset)    382                 if (set->end == t->end_offset) {
383                         set->k = __btree_node_    383                         set->k = __btree_node_key_to_offset(b, k);
384                         bch2_btree_node_iter_s    384                         bch2_btree_node_iter_sort(iter, b);
385                         return;                   385                         return;
386                 }                                 386                 }
387                                                   387 
388         bch2_btree_node_iter_push(iter, b, k,     388         bch2_btree_node_iter_push(iter, b, k, btree_bkey_last(b, t));
389 }                                                 389 }
390                                                   390 
391 static void __bch2_btree_path_fix_key_modified    391 static void __bch2_btree_path_fix_key_modified(struct btree_path *path,
392                                                   392                                                struct btree *b,
393                                                   393                                                struct bkey_packed *where)
394 {                                                 394 {
395         struct btree_path_level *l = &path->l[    395         struct btree_path_level *l = &path->l[b->c.level];
396                                                   396 
397         if (where != bch2_btree_node_iter_peek    397         if (where != bch2_btree_node_iter_peek_all(&l->iter, l->b))
398                 return;                           398                 return;
399                                                   399 
400         if (bkey_iter_pos_cmp(l->b, where, &pa    400         if (bkey_iter_pos_cmp(l->b, where, &path->pos) < 0)
401                 bch2_btree_node_iter_advance(&    401                 bch2_btree_node_iter_advance(&l->iter, l->b);
402 }                                                 402 }
403                                                   403 
404 void bch2_btree_path_fix_key_modified(struct b    404 void bch2_btree_path_fix_key_modified(struct btree_trans *trans,
405                                       struct b    405                                       struct btree *b,
406                                       struct b    406                                       struct bkey_packed *where)
407 {                                                 407 {
408         struct btree_path *path;                  408         struct btree_path *path;
409         unsigned i;                               409         unsigned i;
410                                                   410 
411         trans_for_each_path_with_node(trans, b    411         trans_for_each_path_with_node(trans, b, path, i) {
412                 __bch2_btree_path_fix_key_modi    412                 __bch2_btree_path_fix_key_modified(path, b, where);
413                 bch2_btree_path_verify_level(t    413                 bch2_btree_path_verify_level(trans, path, b->c.level);
414         }                                         414         }
415 }                                                 415 }
416                                                   416 
417 static void __bch2_btree_node_iter_fix(struct     417 static void __bch2_btree_node_iter_fix(struct btree_path *path,
418                                        struct     418                                        struct btree *b,
419                                        struct     419                                        struct btree_node_iter *node_iter,
420                                        struct     420                                        struct bset_tree *t,
421                                        struct     421                                        struct bkey_packed *where,
422                                        unsigne    422                                        unsigned clobber_u64s,
423                                        unsigne    423                                        unsigned new_u64s)
424 {                                                 424 {
425         const struct bkey_packed *end = btree_    425         const struct bkey_packed *end = btree_bkey_last(b, t);
426         struct btree_node_iter_set *set;          426         struct btree_node_iter_set *set;
427         unsigned offset = __btree_node_key_to_    427         unsigned offset = __btree_node_key_to_offset(b, where);
428         int shift = new_u64s - clobber_u64s;      428         int shift = new_u64s - clobber_u64s;
429         unsigned old_end = t->end_offset - shi    429         unsigned old_end = t->end_offset - shift;
430         unsigned orig_iter_pos = node_iter->da    430         unsigned orig_iter_pos = node_iter->data[0].k;
431         bool iter_current_key_modified =          431         bool iter_current_key_modified =
432                 orig_iter_pos >= offset &&        432                 orig_iter_pos >= offset &&
433                 orig_iter_pos <= offset + clob    433                 orig_iter_pos <= offset + clobber_u64s;
434                                                   434 
435         btree_node_iter_for_each(node_iter, se    435         btree_node_iter_for_each(node_iter, set)
436                 if (set->end == old_end)          436                 if (set->end == old_end)
437                         goto found;               437                         goto found;
438                                                   438 
439         /* didn't find the bset in the iterato    439         /* didn't find the bset in the iterator - might have to readd it: */
440         if (new_u64s &&                           440         if (new_u64s &&
441             bkey_iter_pos_cmp(b, where, &path-    441             bkey_iter_pos_cmp(b, where, &path->pos) >= 0) {
442                 bch2_btree_node_iter_push(node    442                 bch2_btree_node_iter_push(node_iter, b, where, end);
443                 goto fixup_done;                  443                 goto fixup_done;
444         } else {                                  444         } else {
445                 /* Iterator is after key that     445                 /* Iterator is after key that changed */
446                 return;                           446                 return;
447         }                                         447         }
448 found:                                            448 found:
449         set->end = t->end_offset;                 449         set->end = t->end_offset;
450                                                   450 
451         /* Iterator hasn't gotten to the key t    451         /* Iterator hasn't gotten to the key that changed yet: */
452         if (set->k < offset)                      452         if (set->k < offset)
453                 return;                           453                 return;
454                                                   454 
455         if (new_u64s &&                           455         if (new_u64s &&
456             bkey_iter_pos_cmp(b, where, &path-    456             bkey_iter_pos_cmp(b, where, &path->pos) >= 0) {
457                 set->k = offset;                  457                 set->k = offset;
458         } else if (set->k < offset + clobber_u    458         } else if (set->k < offset + clobber_u64s) {
459                 set->k = offset + new_u64s;       459                 set->k = offset + new_u64s;
460                 if (set->k == set->end)           460                 if (set->k == set->end)
461                         bch2_btree_node_iter_s    461                         bch2_btree_node_iter_set_drop(node_iter, set);
462         } else {                                  462         } else {
463                 /* Iterator is after key that     463                 /* Iterator is after key that changed */
464                 set->k = (int) set->k + shift;    464                 set->k = (int) set->k + shift;
465                 return;                           465                 return;
466         }                                         466         }
467                                                   467 
468         bch2_btree_node_iter_sort(node_iter, b    468         bch2_btree_node_iter_sort(node_iter, b);
469 fixup_done:                                       469 fixup_done:
470         if (node_iter->data[0].k != orig_iter_    470         if (node_iter->data[0].k != orig_iter_pos)
471                 iter_current_key_modified = tr    471                 iter_current_key_modified = true;
472                                                   472 
473         /*                                        473         /*
474          * When a new key is added, and the no    474          * When a new key is added, and the node iterator now points to that
475          * key, the iterator might have skippe    475          * key, the iterator might have skipped past deleted keys that should
476          * come after the key the iterator now    476          * come after the key the iterator now points to. We have to rewind to
477          * before those deleted keys - otherwi    477          * before those deleted keys - otherwise
478          * bch2_btree_node_iter_prev_all() bre    478          * bch2_btree_node_iter_prev_all() breaks:
479          */                                       479          */
480         if (!bch2_btree_node_iter_end(node_ite    480         if (!bch2_btree_node_iter_end(node_iter) &&
481             iter_current_key_modified &&          481             iter_current_key_modified &&
482             b->c.level) {                         482             b->c.level) {
483                 struct bkey_packed *k, *k2, *p    483                 struct bkey_packed *k, *k2, *p;
484                                                   484 
485                 k = bch2_btree_node_iter_peek_    485                 k = bch2_btree_node_iter_peek_all(node_iter, b);
486                                                   486 
487                 for_each_bset(b, t) {             487                 for_each_bset(b, t) {
488                         bool set_pos = false;     488                         bool set_pos = false;
489                                                   489 
490                         if (node_iter->data[0]    490                         if (node_iter->data[0].end == t->end_offset)
491                                 continue;         491                                 continue;
492                                                   492 
493                         k2 = bch2_btree_node_i    493                         k2 = bch2_btree_node_iter_bset_pos(node_iter, b, t);
494                                                   494 
495                         while ((p = bch2_bkey_    495                         while ((p = bch2_bkey_prev_all(b, t, k2)) &&
496                                bkey_iter_cmp(b    496                                bkey_iter_cmp(b, k, p) < 0) {
497                                 k2 = p;           497                                 k2 = p;
498                                 set_pos = true    498                                 set_pos = true;
499                         }                         499                         }
500                                                   500 
501                         if (set_pos)              501                         if (set_pos)
502                                 btree_node_ite    502                                 btree_node_iter_set_set_pos(node_iter,
503                                                   503                                                             b, t, k2);
504                 }                                 504                 }
505         }                                         505         }
506 }                                                 506 }
507                                                   507 
508 void bch2_btree_node_iter_fix(struct btree_tra    508 void bch2_btree_node_iter_fix(struct btree_trans *trans,
509                               struct btree_pat    509                               struct btree_path *path,
510                               struct btree *b,    510                               struct btree *b,
511                               struct btree_nod    511                               struct btree_node_iter *node_iter,
512                               struct bkey_pack    512                               struct bkey_packed *where,
513                               unsigned clobber    513                               unsigned clobber_u64s,
514                               unsigned new_u64    514                               unsigned new_u64s)
515 {                                                 515 {
516         struct bset_tree *t = bch2_bkey_to_bse    516         struct bset_tree *t = bch2_bkey_to_bset_inlined(b, where);
517         struct btree_path *linked;                517         struct btree_path *linked;
518         unsigned i;                               518         unsigned i;
519                                                   519 
520         if (node_iter != &path->l[b->c.level].    520         if (node_iter != &path->l[b->c.level].iter) {
521                 __bch2_btree_node_iter_fix(pat    521                 __bch2_btree_node_iter_fix(path, b, node_iter, t,
522                                            whe    522                                            where, clobber_u64s, new_u64s);
523                                                   523 
524                 if (bch2_debug_check_iterators    524                 if (bch2_debug_check_iterators)
525                         bch2_btree_node_iter_v    525                         bch2_btree_node_iter_verify(node_iter, b);
526         }                                         526         }
527                                                   527 
528         trans_for_each_path_with_node(trans, b    528         trans_for_each_path_with_node(trans, b, linked, i) {
529                 __bch2_btree_node_iter_fix(lin    529                 __bch2_btree_node_iter_fix(linked, b,
530                                            &li    530                                            &linked->l[b->c.level].iter, t,
531                                            whe    531                                            where, clobber_u64s, new_u64s);
532                 bch2_btree_path_verify_level(t    532                 bch2_btree_path_verify_level(trans, linked, b->c.level);
533         }                                         533         }
534 }                                                 534 }
535                                                   535 
536 /* Btree path level: pointer to a particular b    536 /* Btree path level: pointer to a particular btree node and node iter */
537                                                   537 
538 static inline struct bkey_s_c __btree_iter_unp    538 static inline struct bkey_s_c __btree_iter_unpack(struct bch_fs *c,
539                                                   539                                                   struct btree_path_level *l,
540                                                   540                                                   struct bkey *u,
541                                                   541                                                   struct bkey_packed *k)
542 {                                                 542 {
543         if (unlikely(!k)) {                       543         if (unlikely(!k)) {
544                 /*                                544                 /*
545                  * signal to bch2_btree_iter_p    545                  * signal to bch2_btree_iter_peek_slot() that we're currently at
546                  * a hole                         546                  * a hole
547                  */                               547                  */
548                 u->type = KEY_TYPE_deleted;       548                 u->type = KEY_TYPE_deleted;
549                 return bkey_s_c_null;             549                 return bkey_s_c_null;
550         }                                         550         }
551                                                   551 
552         return bkey_disassemble(l->b, k, u);      552         return bkey_disassemble(l->b, k, u);
553 }                                                 553 }
554                                                   554 
555 static inline struct bkey_s_c btree_path_level    555 static inline struct bkey_s_c btree_path_level_peek_all(struct bch_fs *c,
556                                                   556                                                         struct btree_path_level *l,
557                                                   557                                                         struct bkey *u)
558 {                                                 558 {
559         return __btree_iter_unpack(c, l, u,       559         return __btree_iter_unpack(c, l, u,
560                         bch2_btree_node_iter_p    560                         bch2_btree_node_iter_peek_all(&l->iter, l->b));
561 }                                                 561 }
562                                                   562 
563 static inline struct bkey_s_c btree_path_level    563 static inline struct bkey_s_c btree_path_level_peek(struct btree_trans *trans,
564                                                   564                                                     struct btree_path *path,
565                                                   565                                                     struct btree_path_level *l,
566                                                   566                                                     struct bkey *u)
567 {                                                 567 {
568         struct bkey_s_c k = __btree_iter_unpac    568         struct bkey_s_c k = __btree_iter_unpack(trans->c, l, u,
569                         bch2_btree_node_iter_p    569                         bch2_btree_node_iter_peek(&l->iter, l->b));
570                                                   570 
571         path->pos = k.k ? k.k->p : l->b->key.k    571         path->pos = k.k ? k.k->p : l->b->key.k.p;
572         trans->paths_sorted = false;              572         trans->paths_sorted = false;
573         bch2_btree_path_verify_level(trans, pa    573         bch2_btree_path_verify_level(trans, path, l - path->l);
574         return k;                                 574         return k;
575 }                                                 575 }
576                                                   576 
577 static inline struct bkey_s_c btree_path_level    577 static inline struct bkey_s_c btree_path_level_prev(struct btree_trans *trans,
578                                                   578                                                     struct btree_path *path,
579                                                   579                                                     struct btree_path_level *l,
580                                                   580                                                     struct bkey *u)
581 {                                                 581 {
582         struct bkey_s_c k = __btree_iter_unpac    582         struct bkey_s_c k = __btree_iter_unpack(trans->c, l, u,
583                         bch2_btree_node_iter_p    583                         bch2_btree_node_iter_prev(&l->iter, l->b));
584                                                   584 
585         path->pos = k.k ? k.k->p : l->b->data-    585         path->pos = k.k ? k.k->p : l->b->data->min_key;
586         trans->paths_sorted = false;              586         trans->paths_sorted = false;
587         bch2_btree_path_verify_level(trans, pa    587         bch2_btree_path_verify_level(trans, path, l - path->l);
588         return k;                                 588         return k;
589 }                                                 589 }
590                                                   590 
591 static inline bool btree_path_advance_to_pos(s    591 static inline bool btree_path_advance_to_pos(struct btree_path *path,
592                                              s    592                                              struct btree_path_level *l,
593                                              i    593                                              int max_advance)
594 {                                                 594 {
595         struct bkey_packed *k;                    595         struct bkey_packed *k;
596         int nr_advanced = 0;                      596         int nr_advanced = 0;
597                                                   597 
598         while ((k = bch2_btree_node_iter_peek_    598         while ((k = bch2_btree_node_iter_peek_all(&l->iter, l->b)) &&
599                bkey_iter_pos_cmp(l->b, k, &pat    599                bkey_iter_pos_cmp(l->b, k, &path->pos) < 0) {
600                 if (max_advance > 0 && nr_adva    600                 if (max_advance > 0 && nr_advanced >= max_advance)
601                         return false;             601                         return false;
602                                                   602 
603                 bch2_btree_node_iter_advance(&    603                 bch2_btree_node_iter_advance(&l->iter, l->b);
604                 nr_advanced++;                    604                 nr_advanced++;
605         }                                         605         }
606                                                   606 
607         return true;                              607         return true;
608 }                                                 608 }
609                                                   609 
610 static inline void __btree_path_level_init(str    610 static inline void __btree_path_level_init(struct btree_path *path,
611                                            uns    611                                            unsigned level)
612 {                                                 612 {
613         struct btree_path_level *l = &path->l[    613         struct btree_path_level *l = &path->l[level];
614                                                   614 
615         bch2_btree_node_iter_init(&l->iter, l-    615         bch2_btree_node_iter_init(&l->iter, l->b, &path->pos);
616                                                   616 
617         /*                                        617         /*
618          * Iterators to interior nodes should     618          * Iterators to interior nodes should always be pointed at the first non
619          * whiteout:                              619          * whiteout:
620          */                                       620          */
621         if (level)                                621         if (level)
622                 bch2_btree_node_iter_peek(&l->    622                 bch2_btree_node_iter_peek(&l->iter, l->b);
623 }                                                 623 }
624                                                   624 
625 void bch2_btree_path_level_init(struct btree_t    625 void bch2_btree_path_level_init(struct btree_trans *trans,
626                                 struct btree_p    626                                 struct btree_path *path,
627                                 struct btree *    627                                 struct btree *b)
628 {                                                 628 {
629         BUG_ON(path->cached);                     629         BUG_ON(path->cached);
630                                                   630 
631         EBUG_ON(!btree_path_pos_in_node(path,     631         EBUG_ON(!btree_path_pos_in_node(path, b));
632                                                   632 
633         path->l[b->c.level].lock_seq = six_loc    633         path->l[b->c.level].lock_seq = six_lock_seq(&b->c.lock);
634         path->l[b->c.level].b = b;                634         path->l[b->c.level].b = b;
635         __btree_path_level_init(path, b->c.lev    635         __btree_path_level_init(path, b->c.level);
636 }                                                 636 }
637                                                   637 
638 /* Btree path: fixups after btree node updates    638 /* Btree path: fixups after btree node updates: */
639                                                   639 
640 static void bch2_trans_revalidate_updates_in_n    640 static void bch2_trans_revalidate_updates_in_node(struct btree_trans *trans, struct btree *b)
641 {                                                 641 {
642         struct bch_fs *c = trans->c;              642         struct bch_fs *c = trans->c;
643                                                   643 
644         trans_for_each_update(trans, i)           644         trans_for_each_update(trans, i)
645                 if (!i->cached &&                 645                 if (!i->cached &&
646                     i->level    == b->c.level     646                     i->level    == b->c.level &&
647                     i->btree_id == b->c.btree_    647                     i->btree_id == b->c.btree_id &&
648                     bpos_cmp(i->k->k.p, b->dat    648                     bpos_cmp(i->k->k.p, b->data->min_key) >= 0 &&
649                     bpos_cmp(i->k->k.p, b->dat    649                     bpos_cmp(i->k->k.p, b->data->max_key) <= 0) {
650                         i->old_v = bch2_btree_    650                         i->old_v = bch2_btree_path_peek_slot(trans->paths + i->path, &i->old_k).v;
651                                                   651 
652                         if (unlikely(trans->jo    652                         if (unlikely(trans->journal_replay_not_finished)) {
653                                 struct bkey_i     653                                 struct bkey_i *j_k =
654                                         bch2_j    654                                         bch2_journal_keys_peek_slot(c, i->btree_id, i->level,
655                                                   655                                                                     i->k->k.p);
656                                                   656 
657                                 if (j_k) {        657                                 if (j_k) {
658                                         i->old    658                                         i->old_k = j_k->k;
659                                         i->old    659                                         i->old_v = &j_k->v;
660                                 }                 660                                 }
661                         }                         661                         }
662                 }                                 662                 }
663 }                                                 663 }
664                                                   664 
665 /*                                                665 /*
666  * A btree node is being replaced - update the    666  * A btree node is being replaced - update the iterator to point to the new
667  * node:                                          667  * node:
668  */                                               668  */
669 void bch2_trans_node_add(struct btree_trans *t    669 void bch2_trans_node_add(struct btree_trans *trans,
670                          struct btree_path *pa    670                          struct btree_path *path,
671                          struct btree *b)         671                          struct btree *b)
672 {                                                 672 {
673         struct btree_path *prev;                  673         struct btree_path *prev;
674                                                   674 
675         BUG_ON(!btree_path_pos_in_node(path, b    675         BUG_ON(!btree_path_pos_in_node(path, b));
676                                                   676 
677         while ((prev = prev_btree_path(trans,     677         while ((prev = prev_btree_path(trans, path)) &&
678                btree_path_pos_in_node(prev, b)    678                btree_path_pos_in_node(prev, b))
679                 path = prev;                      679                 path = prev;
680                                                   680 
681         for (;                                    681         for (;
682              path && btree_path_pos_in_node(pa    682              path && btree_path_pos_in_node(path, b);
683              path = next_btree_path(trans, pat    683              path = next_btree_path(trans, path))
684                 if (path->uptodate == BTREE_IT    684                 if (path->uptodate == BTREE_ITER_UPTODATE && !path->cached) {
685                         enum btree_node_locked    685                         enum btree_node_locked_type t =
686                                 btree_lock_wan    686                                 btree_lock_want(path, b->c.level);
687                                                   687 
688                         if (t != BTREE_NODE_UN    688                         if (t != BTREE_NODE_UNLOCKED) {
689                                 btree_node_unl    689                                 btree_node_unlock(trans, path, b->c.level);
690                                 six_lock_incre    690                                 six_lock_increment(&b->c.lock, (enum six_lock_type) t);
691                                 mark_btree_nod    691                                 mark_btree_node_locked(trans, path, b->c.level, t);
692                         }                         692                         }
693                                                   693 
694                         bch2_btree_path_level_    694                         bch2_btree_path_level_init(trans, path, b);
695                 }                                 695                 }
696                                                   696 
697         bch2_trans_revalidate_updates_in_node(    697         bch2_trans_revalidate_updates_in_node(trans, b);
698 }                                                 698 }
699                                                   699 
700 /*                                                700 /*
701  * A btree node has been modified in such a wa    701  * A btree node has been modified in such a way as to invalidate iterators - fix
702  * them:                                          702  * them:
703  */                                               703  */
704 void bch2_trans_node_reinit_iter(struct btree_    704 void bch2_trans_node_reinit_iter(struct btree_trans *trans, struct btree *b)
705 {                                                 705 {
706         struct btree_path *path;                  706         struct btree_path *path;
707         unsigned i;                               707         unsigned i;
708                                                   708 
709         trans_for_each_path_with_node(trans, b    709         trans_for_each_path_with_node(trans, b, path, i)
710                 __btree_path_level_init(path,     710                 __btree_path_level_init(path, b->c.level);
711                                                   711 
712         bch2_trans_revalidate_updates_in_node(    712         bch2_trans_revalidate_updates_in_node(trans, b);
713 }                                                 713 }
714                                                   714 
715 /* Btree path: traverse, set_pos: */              715 /* Btree path: traverse, set_pos: */
716                                                   716 
717 static inline int btree_path_lock_root(struct     717 static inline int btree_path_lock_root(struct btree_trans *trans,
718                                        struct     718                                        struct btree_path *path,
719                                        unsigne    719                                        unsigned depth_want,
720                                        unsigne    720                                        unsigned long trace_ip)
721 {                                                 721 {
722         struct bch_fs *c = trans->c;              722         struct bch_fs *c = trans->c;
723         struct btree *b, **rootp = &bch2_btree    723         struct btree *b, **rootp = &bch2_btree_id_root(c, path->btree_id)->b;
724         enum six_lock_type lock_type;             724         enum six_lock_type lock_type;
725         unsigned i;                               725         unsigned i;
726         int ret;                                  726         int ret;
727                                                   727 
728         EBUG_ON(path->nodes_locked);              728         EBUG_ON(path->nodes_locked);
729                                                   729 
730         while (1) {                               730         while (1) {
731                 b = READ_ONCE(*rootp);            731                 b = READ_ONCE(*rootp);
732                 path->level = READ_ONCE(b->c.l    732                 path->level = READ_ONCE(b->c.level);
733                                                   733 
734                 if (unlikely(path->level < dep    734                 if (unlikely(path->level < depth_want)) {
735                         /*                        735                         /*
736                          * the root is at a lo    736                          * the root is at a lower depth than the depth we want:
737                          * got to the end of t    737                          * got to the end of the btree, or we're walking nodes
738                          * greater than some d    738                          * greater than some depth and there are no nodes >=
739                          * that depth             739                          * that depth
740                          */                       740                          */
741                         path->level = depth_wa    741                         path->level = depth_want;
742                         for (i = path->level;     742                         for (i = path->level; i < BTREE_MAX_DEPTH; i++)
743                                 path->l[i].b =    743                                 path->l[i].b = NULL;
744                         return 1;                 744                         return 1;
745                 }                                 745                 }
746                                                   746 
747                 lock_type = __btree_lock_want(    747                 lock_type = __btree_lock_want(path, path->level);
748                 ret = btree_node_lock(trans, p    748                 ret = btree_node_lock(trans, path, &b->c,
749                                       path->le    749                                       path->level, lock_type, trace_ip);
750                 if (unlikely(ret)) {              750                 if (unlikely(ret)) {
751                         if (bch2_err_matches(r    751                         if (bch2_err_matches(ret, BCH_ERR_lock_fail_root_changed))
752                                 continue;         752                                 continue;
753                         if (bch2_err_matches(r    753                         if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
754                                 return ret;       754                                 return ret;
755                         BUG();                    755                         BUG();
756                 }                                 756                 }
757                                                   757 
758                 if (likely(b == READ_ONCE(*roo    758                 if (likely(b == READ_ONCE(*rootp) &&
759                            b->c.level == path-    759                            b->c.level == path->level &&
760                            !race_fault())) {      760                            !race_fault())) {
761                         for (i = 0; i < path->    761                         for (i = 0; i < path->level; i++)
762                                 path->l[i].b =    762                                 path->l[i].b = ERR_PTR(-BCH_ERR_no_btree_node_lock_root);
763                         path->l[path->level].b    763                         path->l[path->level].b = b;
764                         for (i = path->level +    764                         for (i = path->level + 1; i < BTREE_MAX_DEPTH; i++)
765                                 path->l[i].b =    765                                 path->l[i].b = NULL;
766                                                   766 
767                         mark_btree_node_locked    767                         mark_btree_node_locked(trans, path, path->level,
768                                                   768                                                (enum btree_node_locked_type) lock_type);
769                         bch2_btree_path_level_    769                         bch2_btree_path_level_init(trans, path, b);
770                         return 0;                 770                         return 0;
771                 }                                 771                 }
772                                                   772 
773                 six_unlock_type(&b->c.lock, lo    773                 six_unlock_type(&b->c.lock, lock_type);
774         }                                         774         }
775 }                                                 775 }
776                                                   776 
777 noinline                                          777 noinline
778 static int btree_path_prefetch(struct btree_tr    778 static int btree_path_prefetch(struct btree_trans *trans, struct btree_path *path)
779 {                                                 779 {
780         struct bch_fs *c = trans->c;              780         struct bch_fs *c = trans->c;
781         struct btree_path_level *l = path_l(pa    781         struct btree_path_level *l = path_l(path);
782         struct btree_node_iter node_iter = l->    782         struct btree_node_iter node_iter = l->iter;
783         struct bkey_packed *k;                    783         struct bkey_packed *k;
784         struct bkey_buf tmp;                      784         struct bkey_buf tmp;
785         unsigned nr = test_bit(BCH_FS_started,    785         unsigned nr = test_bit(BCH_FS_started, &c->flags)
786                 ? (path->level > 1 ? 0 :  2)      786                 ? (path->level > 1 ? 0 :  2)
787                 : (path->level > 1 ? 1 : 16);     787                 : (path->level > 1 ? 1 : 16);
788         bool was_locked = btree_node_locked(pa    788         bool was_locked = btree_node_locked(path, path->level);
789         int ret = 0;                              789         int ret = 0;
790                                                   790 
791         bch2_bkey_buf_init(&tmp);                 791         bch2_bkey_buf_init(&tmp);
792                                                   792 
793         while (nr-- && !ret) {                    793         while (nr-- && !ret) {
794                 if (!bch2_btree_node_relock(tr    794                 if (!bch2_btree_node_relock(trans, path, path->level))
795                         break;                    795                         break;
796                                                   796 
797                 bch2_btree_node_iter_advance(&    797                 bch2_btree_node_iter_advance(&node_iter, l->b);
798                 k = bch2_btree_node_iter_peek(    798                 k = bch2_btree_node_iter_peek(&node_iter, l->b);
799                 if (!k)                           799                 if (!k)
800                         break;                    800                         break;
801                                                   801 
802                 bch2_bkey_buf_unpack(&tmp, c,     802                 bch2_bkey_buf_unpack(&tmp, c, l->b, k);
803                 ret = bch2_btree_node_prefetch    803                 ret = bch2_btree_node_prefetch(trans, path, tmp.k, path->btree_id,
804                                                   804                                                path->level - 1);
805         }                                         805         }
806                                                   806 
807         if (!was_locked)                          807         if (!was_locked)
808                 btree_node_unlock(trans, path,    808                 btree_node_unlock(trans, path, path->level);
809                                                   809 
810         bch2_bkey_buf_exit(&tmp, c);              810         bch2_bkey_buf_exit(&tmp, c);
811         return ret;                               811         return ret;
812 }                                                 812 }
813                                                   813 
814 static int btree_path_prefetch_j(struct btree_    814 static int btree_path_prefetch_j(struct btree_trans *trans, struct btree_path *path,
815                                  struct btree_    815                                  struct btree_and_journal_iter *jiter)
816 {                                                 816 {
817         struct bch_fs *c = trans->c;              817         struct bch_fs *c = trans->c;
818         struct bkey_s_c k;                        818         struct bkey_s_c k;
819         struct bkey_buf tmp;                      819         struct bkey_buf tmp;
820         unsigned nr = test_bit(BCH_FS_started,    820         unsigned nr = test_bit(BCH_FS_started, &c->flags)
821                 ? (path->level > 1 ? 0 :  2)      821                 ? (path->level > 1 ? 0 :  2)
822                 : (path->level > 1 ? 1 : 16);     822                 : (path->level > 1 ? 1 : 16);
823         bool was_locked = btree_node_locked(pa    823         bool was_locked = btree_node_locked(path, path->level);
824         int ret = 0;                              824         int ret = 0;
825                                                   825 
826         bch2_bkey_buf_init(&tmp);                 826         bch2_bkey_buf_init(&tmp);
827                                                   827 
828         while (nr-- && !ret) {                    828         while (nr-- && !ret) {
829                 if (!bch2_btree_node_relock(tr    829                 if (!bch2_btree_node_relock(trans, path, path->level))
830                         break;                    830                         break;
831                                                   831 
832                 bch2_btree_and_journal_iter_ad    832                 bch2_btree_and_journal_iter_advance(jiter);
833                 k = bch2_btree_and_journal_ite    833                 k = bch2_btree_and_journal_iter_peek(jiter);
834                 if (!k.k)                         834                 if (!k.k)
835                         break;                    835                         break;
836                                                   836 
837                 bch2_bkey_buf_reassemble(&tmp,    837                 bch2_bkey_buf_reassemble(&tmp, c, k);
838                 ret = bch2_btree_node_prefetch    838                 ret = bch2_btree_node_prefetch(trans, path, tmp.k, path->btree_id,
839                                                   839                                                path->level - 1);
840         }                                         840         }
841                                                   841 
842         if (!was_locked)                          842         if (!was_locked)
843                 btree_node_unlock(trans, path,    843                 btree_node_unlock(trans, path, path->level);
844                                                   844 
845         bch2_bkey_buf_exit(&tmp, c);              845         bch2_bkey_buf_exit(&tmp, c);
846         return ret;                               846         return ret;
847 }                                                 847 }
848                                                   848 
849 static noinline void btree_node_mem_ptr_set(st    849 static noinline void btree_node_mem_ptr_set(struct btree_trans *trans,
850                                             st    850                                             struct btree_path *path,
851                                             un    851                                             unsigned plevel, struct btree *b)
852 {                                                 852 {
853         struct btree_path_level *l = &path->l[    853         struct btree_path_level *l = &path->l[plevel];
854         bool locked = btree_node_locked(path,     854         bool locked = btree_node_locked(path, plevel);
855         struct bkey_packed *k;                    855         struct bkey_packed *k;
856         struct bch_btree_ptr_v2 *bp;              856         struct bch_btree_ptr_v2 *bp;
857                                                   857 
858         if (!bch2_btree_node_relock(trans, pat    858         if (!bch2_btree_node_relock(trans, path, plevel))
859                 return;                           859                 return;
860                                                   860 
861         k = bch2_btree_node_iter_peek_all(&l->    861         k = bch2_btree_node_iter_peek_all(&l->iter, l->b);
862         BUG_ON(k->type != KEY_TYPE_btree_ptr_v    862         BUG_ON(k->type != KEY_TYPE_btree_ptr_v2);
863                                                   863 
864         bp = (void *) bkeyp_val(&l->b->format,    864         bp = (void *) bkeyp_val(&l->b->format, k);
865         bp->mem_ptr = (unsigned long)b;           865         bp->mem_ptr = (unsigned long)b;
866                                                   866 
867         if (!locked)                              867         if (!locked)
868                 btree_node_unlock(trans, path,    868                 btree_node_unlock(trans, path, plevel);
869 }                                                 869 }
870                                                   870 
871 static noinline int btree_node_iter_and_journa    871 static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans,
872                                                   872                                                      struct btree_path *path,
873                                                   873                                                      unsigned flags,
874                                                   874                                                      struct bkey_buf *out)
875 {                                                 875 {
876         struct bch_fs *c = trans->c;              876         struct bch_fs *c = trans->c;
877         struct btree_path_level *l = path_l(pa    877         struct btree_path_level *l = path_l(path);
878         struct btree_and_journal_iter jiter;      878         struct btree_and_journal_iter jiter;
879         struct bkey_s_c k;                        879         struct bkey_s_c k;
880         int ret = 0;                              880         int ret = 0;
881                                                   881 
882         __bch2_btree_and_journal_iter_init_nod    882         __bch2_btree_and_journal_iter_init_node_iter(trans, &jiter, l->b, l->iter, path->pos);
883                                                   883 
884         k = bch2_btree_and_journal_iter_peek(&    884         k = bch2_btree_and_journal_iter_peek(&jiter);
885         if (!k.k) {                               885         if (!k.k) {
886                 struct printbuf buf = PRINTBUF    886                 struct printbuf buf = PRINTBUF;
887                                                   887 
888                 prt_str(&buf, "node not found     888                 prt_str(&buf, "node not found at pos ");
889                 bch2_bpos_to_text(&buf, path->    889                 bch2_bpos_to_text(&buf, path->pos);
890                 prt_str(&buf, " at btree ");      890                 prt_str(&buf, " at btree ");
891                 bch2_btree_pos_to_text(&buf, c    891                 bch2_btree_pos_to_text(&buf, c, l->b);
892                                                   892 
893                 ret = bch2_fs_topology_error(c    893                 ret = bch2_fs_topology_error(c, "%s", buf.buf);
894                 printbuf_exit(&buf);              894                 printbuf_exit(&buf);
895                 goto err;                         895                 goto err;
896         }                                         896         }
897                                                   897 
898         bch2_bkey_buf_reassemble(out, c, k);      898         bch2_bkey_buf_reassemble(out, c, k);
899                                                   899 
900         if ((flags & BTREE_ITER_prefetch) &&      900         if ((flags & BTREE_ITER_prefetch) &&
901             c->opts.btree_node_prefetch)          901             c->opts.btree_node_prefetch)
902                 ret = btree_path_prefetch_j(tr    902                 ret = btree_path_prefetch_j(trans, path, &jiter);
903                                                   903 
904 err:                                              904 err:
905         bch2_btree_and_journal_iter_exit(&jite    905         bch2_btree_and_journal_iter_exit(&jiter);
906         return ret;                               906         return ret;
907 }                                                 907 }
908                                                   908 
909 static __always_inline int btree_path_down(str    909 static __always_inline int btree_path_down(struct btree_trans *trans,
910                                            str    910                                            struct btree_path *path,
911                                            uns    911                                            unsigned flags,
912                                            uns    912                                            unsigned long trace_ip)
913 {                                                 913 {
914         struct bch_fs *c = trans->c;              914         struct bch_fs *c = trans->c;
915         struct btree_path_level *l = path_l(pa    915         struct btree_path_level *l = path_l(path);
916         struct btree *b;                          916         struct btree *b;
917         unsigned level = path->level - 1;         917         unsigned level = path->level - 1;
918         enum six_lock_type lock_type = __btree    918         enum six_lock_type lock_type = __btree_lock_want(path, level);
919         struct bkey_buf tmp;                      919         struct bkey_buf tmp;
920         int ret;                                  920         int ret;
921                                                   921 
922         EBUG_ON(!btree_node_locked(path, path-    922         EBUG_ON(!btree_node_locked(path, path->level));
923                                                   923 
924         bch2_bkey_buf_init(&tmp);                 924         bch2_bkey_buf_init(&tmp);
925                                                   925 
926         if (unlikely(trans->journal_replay_not    926         if (unlikely(trans->journal_replay_not_finished)) {
927                 ret = btree_node_iter_and_jour    927                 ret = btree_node_iter_and_journal_peek(trans, path, flags, &tmp);
928                 if (ret)                          928                 if (ret)
929                         goto err;                 929                         goto err;
930         } else {                                  930         } else {
931                 struct bkey_packed *k = bch2_b    931                 struct bkey_packed *k = bch2_btree_node_iter_peek(&l->iter, l->b);
932                 if (!k) {                         932                 if (!k) {
933                         struct printbuf buf =     933                         struct printbuf buf = PRINTBUF;
934                                                   934 
935                         prt_str(&buf, "node no    935                         prt_str(&buf, "node not found at pos ");
936                         bch2_bpos_to_text(&buf    936                         bch2_bpos_to_text(&buf, path->pos);
937                         prt_str(&buf, " within    937                         prt_str(&buf, " within parent node ");
938                         bch2_bkey_val_to_text(    938                         bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&l->b->key));
939                                                   939 
940                         bch2_fs_fatal_error(c,    940                         bch2_fs_fatal_error(c, "%s", buf.buf);
941                         printbuf_exit(&buf);      941                         printbuf_exit(&buf);
942                         ret = -BCH_ERR_btree_n    942                         ret = -BCH_ERR_btree_need_topology_repair;
943                         goto err;                 943                         goto err;
944                 }                                 944                 }
945                                                   945 
946                 bch2_bkey_buf_unpack(&tmp, c,     946                 bch2_bkey_buf_unpack(&tmp, c, l->b, k);
947                                                   947 
948                 if ((flags & BTREE_ITER_prefet    948                 if ((flags & BTREE_ITER_prefetch) &&
949                     c->opts.btree_node_prefetc    949                     c->opts.btree_node_prefetch) {
950                         ret = btree_path_prefe    950                         ret = btree_path_prefetch(trans, path);
951                         if (ret)                  951                         if (ret)
952                                 goto err;         952                                 goto err;
953                 }                                 953                 }
954         }                                         954         }
955                                                   955 
956         b = bch2_btree_node_get(trans, path, t    956         b = bch2_btree_node_get(trans, path, tmp.k, level, lock_type, trace_ip);
957         ret = PTR_ERR_OR_ZERO(b);                 957         ret = PTR_ERR_OR_ZERO(b);
958         if (unlikely(ret))                        958         if (unlikely(ret))
959                 goto err;                         959                 goto err;
960                                                   960 
961         if (likely(!trans->journal_replay_not_    961         if (likely(!trans->journal_replay_not_finished &&
962                    tmp.k->k.type == KEY_TYPE_b    962                    tmp.k->k.type == KEY_TYPE_btree_ptr_v2) &&
963             unlikely(b != btree_node_mem_ptr(t    963             unlikely(b != btree_node_mem_ptr(tmp.k)))
964                 btree_node_mem_ptr_set(trans,     964                 btree_node_mem_ptr_set(trans, path, level + 1, b);
965                                                   965 
966         if (btree_node_read_locked(path, level    966         if (btree_node_read_locked(path, level + 1))
967                 btree_node_unlock(trans, path,    967                 btree_node_unlock(trans, path, level + 1);
968                                                   968 
969         mark_btree_node_locked(trans, path, le    969         mark_btree_node_locked(trans, path, level,
970                                (enum btree_nod    970                                (enum btree_node_locked_type) lock_type);
971         path->level = level;                      971         path->level = level;
972         bch2_btree_path_level_init(trans, path    972         bch2_btree_path_level_init(trans, path, b);
973                                                   973 
974         bch2_btree_path_verify_locks(path);       974         bch2_btree_path_verify_locks(path);
975 err:                                              975 err:
976         bch2_bkey_buf_exit(&tmp, c);              976         bch2_bkey_buf_exit(&tmp, c);
977         return ret;                               977         return ret;
978 }                                                 978 }
979                                                   979 
980 static int bch2_btree_path_traverse_all(struct    980 static int bch2_btree_path_traverse_all(struct btree_trans *trans)
981 {                                                 981 {
982         struct bch_fs *c = trans->c;              982         struct bch_fs *c = trans->c;
983         struct btree_path *path;                  983         struct btree_path *path;
984         unsigned long trace_ip = _RET_IP_;        984         unsigned long trace_ip = _RET_IP_;
985         unsigned i;                               985         unsigned i;
986         int ret = 0;                              986         int ret = 0;
987                                                   987 
988         if (trans->in_traverse_all)               988         if (trans->in_traverse_all)
989                 return -BCH_ERR_transaction_re    989                 return -BCH_ERR_transaction_restart_in_traverse_all;
990                                                   990 
991         trans->in_traverse_all = true;            991         trans->in_traverse_all = true;
992 retry_all:                                        992 retry_all:
993         trans->restarted = 0;                     993         trans->restarted = 0;
994         trans->last_restarted_ip = 0;             994         trans->last_restarted_ip = 0;
995                                                   995 
996         trans_for_each_path(trans, path, i)       996         trans_for_each_path(trans, path, i)
997                 path->should_be_locked = false    997                 path->should_be_locked = false;
998                                                   998 
999         btree_trans_sort_paths(trans);            999         btree_trans_sort_paths(trans);
1000                                                  1000 
1001         bch2_trans_unlock(trans);                1001         bch2_trans_unlock(trans);
1002         cond_resched();                          1002         cond_resched();
1003         trans_set_locked(trans);                 1003         trans_set_locked(trans);
1004                                                  1004 
1005         if (unlikely(trans->memory_allocation    1005         if (unlikely(trans->memory_allocation_failure)) {
1006                 struct closure cl;               1006                 struct closure cl;
1007                                                  1007 
1008                 closure_init_stack(&cl);         1008                 closure_init_stack(&cl);
1009                                                  1009 
1010                 do {                             1010                 do {
1011                         ret = bch2_btree_cach    1011                         ret = bch2_btree_cache_cannibalize_lock(trans, &cl);
1012                         closure_sync(&cl);       1012                         closure_sync(&cl);
1013                 } while (ret);                   1013                 } while (ret);
1014         }                                        1014         }
1015                                                  1015 
1016         /* Now, redo traversals in correct or    1016         /* Now, redo traversals in correct order: */
1017         i = 0;                                   1017         i = 0;
1018         while (i < trans->nr_sorted) {           1018         while (i < trans->nr_sorted) {
1019                 btree_path_idx_t idx = trans-    1019                 btree_path_idx_t idx = trans->sorted[i];
1020                                                  1020 
1021                 /*                               1021                 /*
1022                  * Traversing a path can caus    1022                  * Traversing a path can cause another path to be added at about
1023                  * the same position:            1023                  * the same position:
1024                  */                              1024                  */
1025                 if (trans->paths[idx].uptodat    1025                 if (trans->paths[idx].uptodate) {
1026                         __btree_path_get(tran    1026                         __btree_path_get(trans, &trans->paths[idx], false);
1027                         ret = bch2_btree_path    1027                         ret = bch2_btree_path_traverse_one(trans, idx, 0, _THIS_IP_);
1028                         __btree_path_put(tran    1028                         __btree_path_put(trans, &trans->paths[idx], false);
1029                                                  1029 
1030                         if (bch2_err_matches(    1030                         if (bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
1031                             bch2_err_matches(    1031                             bch2_err_matches(ret, ENOMEM))
1032                                 goto retry_al    1032                                 goto retry_all;
1033                         if (ret)                 1033                         if (ret)
1034                                 goto err;        1034                                 goto err;
1035                 } else {                         1035                 } else {
1036                         i++;                     1036                         i++;
1037                 }                                1037                 }
1038         }                                        1038         }
1039                                                  1039 
1040         /*                                       1040         /*
1041          * We used to assert that all paths h    1041          * We used to assert that all paths had been traversed here
1042          * (path->uptodate < BTREE_ITER_NEED_    1042          * (path->uptodate < BTREE_ITER_NEED_TRAVERSE); however, since
1043          * path->should_be_locked is not set     1043          * path->should_be_locked is not set yet, we might have unlocked and
1044          * then failed to relock a path - tha    1044          * then failed to relock a path - that's fine.
1045          */                                      1045          */
1046 err:                                             1046 err:
1047         bch2_btree_cache_cannibalize_unlock(t    1047         bch2_btree_cache_cannibalize_unlock(trans);
1048                                                  1048 
1049         trans->in_traverse_all = false;          1049         trans->in_traverse_all = false;
1050                                                  1050 
1051         trace_and_count(c, trans_traverse_all    1051         trace_and_count(c, trans_traverse_all, trans, trace_ip);
1052         return ret;                              1052         return ret;
1053 }                                                1053 }
1054                                                  1054 
1055 static inline bool btree_path_check_pos_in_no    1055 static inline bool btree_path_check_pos_in_node(struct btree_path *path,
1056                                                  1056                                                 unsigned l, int check_pos)
1057 {                                                1057 {
1058         if (check_pos < 0 && btree_path_pos_b    1058         if (check_pos < 0 && btree_path_pos_before_node(path, path->l[l].b))
1059                 return false;                    1059                 return false;
1060         if (check_pos > 0 && btree_path_pos_a    1060         if (check_pos > 0 && btree_path_pos_after_node(path, path->l[l].b))
1061                 return false;                    1061                 return false;
1062         return true;                             1062         return true;
1063 }                                                1063 }
1064                                                  1064 
1065 static inline bool btree_path_good_node(struc    1065 static inline bool btree_path_good_node(struct btree_trans *trans,
1066                                         struc    1066                                         struct btree_path *path,
1067                                         unsig    1067                                         unsigned l, int check_pos)
1068 {                                                1068 {
1069         return is_btree_node(path, l) &&         1069         return is_btree_node(path, l) &&
1070                 bch2_btree_node_relock(trans,    1070                 bch2_btree_node_relock(trans, path, l) &&
1071                 btree_path_check_pos_in_node(    1071                 btree_path_check_pos_in_node(path, l, check_pos);
1072 }                                                1072 }
1073                                                  1073 
1074 static void btree_path_set_level_down(struct     1074 static void btree_path_set_level_down(struct btree_trans *trans,
1075                                       struct     1075                                       struct btree_path *path,
1076                                       unsigne    1076                                       unsigned new_level)
1077 {                                                1077 {
1078         unsigned l;                              1078         unsigned l;
1079                                                  1079 
1080         path->level = new_level;                 1080         path->level = new_level;
1081                                                  1081 
1082         for (l = path->level + 1; l < BTREE_M    1082         for (l = path->level + 1; l < BTREE_MAX_DEPTH; l++)
1083                 if (btree_lock_want(path, l)     1083                 if (btree_lock_want(path, l) == BTREE_NODE_UNLOCKED)
1084                         btree_node_unlock(tra    1084                         btree_node_unlock(trans, path, l);
1085                                                  1085 
1086         btree_path_set_dirty(path, BTREE_ITER    1086         btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
1087         bch2_btree_path_verify(trans, path);     1087         bch2_btree_path_verify(trans, path);
1088 }                                                1088 }
1089                                                  1089 
1090 static noinline unsigned __btree_path_up_unti    1090 static noinline unsigned __btree_path_up_until_good_node(struct btree_trans *trans,
1091                                                  1091                                                          struct btree_path *path,
1092                                                  1092                                                          int check_pos)
1093 {                                                1093 {
1094         unsigned i, l = path->level;             1094         unsigned i, l = path->level;
1095 again:                                           1095 again:
1096         while (btree_path_node(path, l) &&       1096         while (btree_path_node(path, l) &&
1097                !btree_path_good_node(trans, p    1097                !btree_path_good_node(trans, path, l, check_pos))
1098                 __btree_path_set_level_up(tra    1098                 __btree_path_set_level_up(trans, path, l++);
1099                                                  1099 
1100         /* If we need intent locks, take them    1100         /* If we need intent locks, take them too: */
1101         for (i = l + 1;                          1101         for (i = l + 1;
1102              i < path->locks_want && btree_pa    1102              i < path->locks_want && btree_path_node(path, i);
1103              i++)                                1103              i++)
1104                 if (!bch2_btree_node_relock(t    1104                 if (!bch2_btree_node_relock(trans, path, i)) {
1105                         while (l <= i)           1105                         while (l <= i)
1106                                 __btree_path_    1106                                 __btree_path_set_level_up(trans, path, l++);
1107                         goto again;              1107                         goto again;
1108                 }                                1108                 }
1109                                                  1109 
1110         return l;                                1110         return l;
1111 }                                                1111 }
1112                                                  1112 
1113 static inline unsigned btree_path_up_until_go    1113 static inline unsigned btree_path_up_until_good_node(struct btree_trans *trans,
1114                                                  1114                                                      struct btree_path *path,
1115                                                  1115                                                      int check_pos)
1116 {                                                1116 {
1117         return likely(btree_node_locked(path,    1117         return likely(btree_node_locked(path, path->level) &&
1118                       btree_path_check_pos_in    1118                       btree_path_check_pos_in_node(path, path->level, check_pos))
1119                 ? path->level                    1119                 ? path->level
1120                 : __btree_path_up_until_good_    1120                 : __btree_path_up_until_good_node(trans, path, check_pos);
1121 }                                                1121 }
1122                                                  1122 
1123 /*                                               1123 /*
1124  * This is the main state machine for walking    1124  * This is the main state machine for walking down the btree - walks down to a
1125  * specified depth                               1125  * specified depth
1126  *                                               1126  *
1127  * Returns 0 on success, -EIO on error (error    1127  * Returns 0 on success, -EIO on error (error reading in a btree node).
1128  *                                               1128  *
1129  * On error, caller (peek_node()/peek_key())     1129  * On error, caller (peek_node()/peek_key()) must return NULL; the error is
1130  * stashed in the iterator and returned from     1130  * stashed in the iterator and returned from bch2_trans_exit().
1131  */                                              1131  */
1132 int bch2_btree_path_traverse_one(struct btree    1132 int bch2_btree_path_traverse_one(struct btree_trans *trans,
1133                                  btree_path_i    1133                                  btree_path_idx_t path_idx,
1134                                  unsigned fla    1134                                  unsigned flags,
1135                                  unsigned lon    1135                                  unsigned long trace_ip)
1136 {                                                1136 {
1137         struct btree_path *path = &trans->pat    1137         struct btree_path *path = &trans->paths[path_idx];
1138         unsigned depth_want = path->level;       1138         unsigned depth_want = path->level;
1139         int ret = -((int) trans->restarted);     1139         int ret = -((int) trans->restarted);
1140                                                  1140 
1141         if (unlikely(ret))                       1141         if (unlikely(ret))
1142                 goto out;                        1142                 goto out;
1143                                                  1143 
1144         if (unlikely(!trans->srcu_held))         1144         if (unlikely(!trans->srcu_held))
1145                 bch2_trans_srcu_lock(trans);     1145                 bch2_trans_srcu_lock(trans);
1146                                                  1146 
1147         trace_btree_path_traverse_start(trans    1147         trace_btree_path_traverse_start(trans, path);
1148                                                  1148 
1149         /*                                       1149         /*
1150          * Ensure we obey path->should_be_loc    1150          * Ensure we obey path->should_be_locked: if it's set, we can't unlock
1151          * and re-traverse the path without a    1151          * and re-traverse the path without a transaction restart:
1152          */                                      1152          */
1153         if (path->should_be_locked) {            1153         if (path->should_be_locked) {
1154                 ret = bch2_btree_path_relock(    1154                 ret = bch2_btree_path_relock(trans, path, trace_ip);
1155                 goto out;                        1155                 goto out;
1156         }                                        1156         }
1157                                                  1157 
1158         if (path->cached) {                      1158         if (path->cached) {
1159                 ret = bch2_btree_path_travers    1159                 ret = bch2_btree_path_traverse_cached(trans, path, flags);
1160                 goto out;                        1160                 goto out;
1161         }                                        1161         }
1162                                                  1162 
1163         path = &trans->paths[path_idx];          1163         path = &trans->paths[path_idx];
1164                                                  1164 
1165         if (unlikely(path->level >= BTREE_MAX    1165         if (unlikely(path->level >= BTREE_MAX_DEPTH))
1166                 goto out_uptodate;               1166                 goto out_uptodate;
1167                                                  1167 
1168         path->level = btree_path_up_until_goo    1168         path->level = btree_path_up_until_good_node(trans, path, 0);
1169         unsigned max_level = path->level;        1169         unsigned max_level = path->level;
1170                                                  1170 
1171         EBUG_ON(btree_path_node(path, path->l    1171         EBUG_ON(btree_path_node(path, path->level) &&
1172                 !btree_node_locked(path, path    1172                 !btree_node_locked(path, path->level));
1173                                                  1173 
1174         /*                                       1174         /*
1175          * Note: path->nodes[path->level] may    1175          * Note: path->nodes[path->level] may be temporarily NULL here - that
1176          * would indicate to other code that     1176          * would indicate to other code that we got to the end of the btree,
1177          * here it indicates that relocking t    1177          * here it indicates that relocking the root failed - it's critical that
1178          * btree_path_lock_root() comes next     1178          * btree_path_lock_root() comes next and that it can't fail
1179          */                                      1179          */
1180         while (path->level > depth_want) {       1180         while (path->level > depth_want) {
1181                 ret = btree_path_node(path, p    1181                 ret = btree_path_node(path, path->level)
1182                         ? btree_path_down(tra    1182                         ? btree_path_down(trans, path, flags, trace_ip)
1183                         : btree_path_lock_roo    1183                         : btree_path_lock_root(trans, path, depth_want, trace_ip);
1184                 if (unlikely(ret)) {             1184                 if (unlikely(ret)) {
1185                         if (ret == 1) {          1185                         if (ret == 1) {
1186                                 /*               1186                                 /*
1187                                  * No nodes a    1187                                  * No nodes at this level - got to the end of
1188                                  * the btree:    1188                                  * the btree:
1189                                  */              1189                                  */
1190                                 ret = 0;         1190                                 ret = 0;
1191                                 goto out;        1191                                 goto out;
1192                         }                        1192                         }
1193                                                  1193 
1194                         __bch2_btree_path_unl    1194                         __bch2_btree_path_unlock(trans, path);
1195                         path->level = depth_w    1195                         path->level = depth_want;
1196                         path->l[path->level].    1196                         path->l[path->level].b = ERR_PTR(ret);
1197                         goto out;                1197                         goto out;
1198                 }                                1198                 }
1199         }                                        1199         }
1200                                                  1200 
1201         if (unlikely(max_level > path->level)    1201         if (unlikely(max_level > path->level)) {
1202                 struct btree_path *linked;       1202                 struct btree_path *linked;
1203                 unsigned iter;                   1203                 unsigned iter;
1204                                                  1204 
1205                 trans_for_each_path_with_node    1205                 trans_for_each_path_with_node(trans, path_l(path)->b, linked, iter)
1206                         for (unsigned j = pat    1206                         for (unsigned j = path->level + 1; j < max_level; j++)
1207                                 linked->l[j]     1207                                 linked->l[j] = path->l[j];
1208         }                                        1208         }
1209                                                  1209 
1210 out_uptodate:                                    1210 out_uptodate:
1211         path->uptodate = BTREE_ITER_UPTODATE;    1211         path->uptodate = BTREE_ITER_UPTODATE;
1212         trace_btree_path_traverse_end(trans,     1212         trace_btree_path_traverse_end(trans, path);
1213 out:                                             1213 out:
1214         if (bch2_err_matches(ret, BCH_ERR_tra    1214         if (bch2_err_matches(ret, BCH_ERR_transaction_restart) != !!trans->restarted)
1215                 panic("ret %s (%i) trans->res    1215                 panic("ret %s (%i) trans->restarted %s (%i)\n",
1216                       bch2_err_str(ret), ret,    1216                       bch2_err_str(ret), ret,
1217                       bch2_err_str(trans->res    1217                       bch2_err_str(trans->restarted), trans->restarted);
1218         bch2_btree_path_verify(trans, path);     1218         bch2_btree_path_verify(trans, path);
1219         return ret;                              1219         return ret;
1220 }                                                1220 }
1221                                                  1221 
1222 static inline void btree_path_copy(struct btr    1222 static inline void btree_path_copy(struct btree_trans *trans, struct btree_path *dst,
1223                             struct btree_path    1223                             struct btree_path *src)
1224 {                                                1224 {
1225         unsigned i, offset = offsetof(struct     1225         unsigned i, offset = offsetof(struct btree_path, pos);
1226                                                  1226 
1227         memcpy((void *) dst + offset,            1227         memcpy((void *) dst + offset,
1228                (void *) src + offset,            1228                (void *) src + offset,
1229                sizeof(struct btree_path) - of    1229                sizeof(struct btree_path) - offset);
1230                                                  1230 
1231         for (i = 0; i < BTREE_MAX_DEPTH; i++)    1231         for (i = 0; i < BTREE_MAX_DEPTH; i++) {
1232                 unsigned t = btree_node_locke    1232                 unsigned t = btree_node_locked_type(dst, i);
1233                                                  1233 
1234                 if (t != BTREE_NODE_UNLOCKED)    1234                 if (t != BTREE_NODE_UNLOCKED)
1235                         six_lock_increment(&d    1235                         six_lock_increment(&dst->l[i].b->c.lock, t);
1236         }                                        1236         }
1237 }                                                1237 }
1238                                                  1238 
1239 static btree_path_idx_t btree_path_clone(stru    1239 static btree_path_idx_t btree_path_clone(struct btree_trans *trans, btree_path_idx_t src,
1240                                          bool    1240                                          bool intent, unsigned long ip)
1241 {                                                1241 {
1242         btree_path_idx_t new = btree_path_all    1242         btree_path_idx_t new = btree_path_alloc(trans, src);
1243         btree_path_copy(trans, trans->paths +    1243         btree_path_copy(trans, trans->paths + new, trans->paths + src);
1244         __btree_path_get(trans, trans->paths     1244         __btree_path_get(trans, trans->paths + new, intent);
1245 #ifdef TRACK_PATH_ALLOCATED                      1245 #ifdef TRACK_PATH_ALLOCATED
1246         trans->paths[new].ip_allocated = ip;     1246         trans->paths[new].ip_allocated = ip;
1247 #endif                                           1247 #endif
1248         return new;                              1248         return new;
1249 }                                                1249 }
1250                                                  1250 
1251 __flatten                                        1251 __flatten
1252 btree_path_idx_t __bch2_btree_path_make_mut(s    1252 btree_path_idx_t __bch2_btree_path_make_mut(struct btree_trans *trans,
1253                         btree_path_idx_t path    1253                         btree_path_idx_t path, bool intent, unsigned long ip)
1254 {                                                1254 {
1255         struct btree_path *old = trans->paths    1255         struct btree_path *old = trans->paths + path;
1256         __btree_path_put(trans, trans->paths     1256         __btree_path_put(trans, trans->paths + path, intent);
1257         path = btree_path_clone(trans, path,     1257         path = btree_path_clone(trans, path, intent, ip);
1258         trace_btree_path_clone(trans, old, tr    1258         trace_btree_path_clone(trans, old, trans->paths + path);
1259         trans->paths[path].preserve = false;     1259         trans->paths[path].preserve = false;
1260         return path;                             1260         return path;
1261 }                                                1261 }
1262                                                  1262 
1263 btree_path_idx_t __must_check                    1263 btree_path_idx_t __must_check
1264 __bch2_btree_path_set_pos(struct btree_trans     1264 __bch2_btree_path_set_pos(struct btree_trans *trans,
1265                           btree_path_idx_t pa    1265                           btree_path_idx_t path_idx, struct bpos new_pos,
1266                           bool intent, unsign    1266                           bool intent, unsigned long ip)
1267 {                                                1267 {
1268         int cmp = bpos_cmp(new_pos, trans->pa    1268         int cmp = bpos_cmp(new_pos, trans->paths[path_idx].pos);
1269                                                  1269 
1270         bch2_trans_verify_not_in_restart(tran    1270         bch2_trans_verify_not_in_restart(trans);
1271         EBUG_ON(!trans->paths[path_idx].ref);    1271         EBUG_ON(!trans->paths[path_idx].ref);
1272                                                  1272 
1273         trace_btree_path_set_pos(trans, trans    1273         trace_btree_path_set_pos(trans, trans->paths + path_idx, &new_pos);
1274                                                  1274 
1275         path_idx = bch2_btree_path_make_mut(t    1275         path_idx = bch2_btree_path_make_mut(trans, path_idx, intent, ip);
1276                                                  1276 
1277         struct btree_path *path = trans->path    1277         struct btree_path *path = trans->paths + path_idx;
1278         path->pos               = new_pos;       1278         path->pos               = new_pos;
1279         trans->paths_sorted     = false;         1279         trans->paths_sorted     = false;
1280                                                  1280 
1281         if (unlikely(path->cached)) {            1281         if (unlikely(path->cached)) {
1282                 btree_node_unlock(trans, path    1282                 btree_node_unlock(trans, path, 0);
1283                 path->l[0].b = ERR_PTR(-BCH_E    1283                 path->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_up);
1284                 btree_path_set_dirty(path, BT    1284                 btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
1285                 goto out;                        1285                 goto out;
1286         }                                        1286         }
1287                                                  1287 
1288         unsigned level = btree_path_up_until_    1288         unsigned level = btree_path_up_until_good_node(trans, path, cmp);
1289                                                  1289 
1290         if (btree_path_node(path, level)) {      1290         if (btree_path_node(path, level)) {
1291                 struct btree_path_level *l =     1291                 struct btree_path_level *l = &path->l[level];
1292                                                  1292 
1293                 BUG_ON(!btree_node_locked(pat    1293                 BUG_ON(!btree_node_locked(path, level));
1294                 /*                               1294                 /*
1295                  * We might have to skip over    1295                  * We might have to skip over many keys, or just a few: try
1296                  * advancing the node iterato    1296                  * advancing the node iterator, and if we have to skip over too
1297                  * many keys just reinit it (    1297                  * many keys just reinit it (or if we're rewinding, since that
1298                  * is expensive).                1298                  * is expensive).
1299                  */                              1299                  */
1300                 if (cmp < 0 ||                   1300                 if (cmp < 0 ||
1301                     !btree_path_advance_to_po    1301                     !btree_path_advance_to_pos(path, l, 8))
1302                         bch2_btree_node_iter_    1302                         bch2_btree_node_iter_init(&l->iter, l->b, &path->pos);
1303                                                  1303 
1304                 /*                               1304                 /*
1305                  * Iterators to interior node    1305                  * Iterators to interior nodes should always be pointed at the first non
1306                  * whiteout:                     1306                  * whiteout:
1307                  */                              1307                  */
1308                 if (unlikely(level))             1308                 if (unlikely(level))
1309                         bch2_btree_node_iter_    1309                         bch2_btree_node_iter_peek(&l->iter, l->b);
1310         }                                        1310         }
1311                                                  1311 
1312         if (unlikely(level != path->level)) {    1312         if (unlikely(level != path->level)) {
1313                 btree_path_set_dirty(path, BT    1313                 btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
1314                 __bch2_btree_path_unlock(tran    1314                 __bch2_btree_path_unlock(trans, path);
1315         }                                        1315         }
1316 out:                                             1316 out:
1317         bch2_btree_path_verify(trans, path);     1317         bch2_btree_path_verify(trans, path);
1318         return path_idx;                         1318         return path_idx;
1319 }                                                1319 }
1320                                                  1320 
1321 /* Btree path: main interface: */                1321 /* Btree path: main interface: */
1322                                                  1322 
1323 static struct btree_path *have_path_at_pos(st    1323 static struct btree_path *have_path_at_pos(struct btree_trans *trans, struct btree_path *path)
1324 {                                                1324 {
1325         struct btree_path *sib;                  1325         struct btree_path *sib;
1326                                                  1326 
1327         sib = prev_btree_path(trans, path);      1327         sib = prev_btree_path(trans, path);
1328         if (sib && !btree_path_cmp(sib, path)    1328         if (sib && !btree_path_cmp(sib, path))
1329                 return sib;                      1329                 return sib;
1330                                                  1330 
1331         sib = next_btree_path(trans, path);      1331         sib = next_btree_path(trans, path);
1332         if (sib && !btree_path_cmp(sib, path)    1332         if (sib && !btree_path_cmp(sib, path))
1333                 return sib;                      1333                 return sib;
1334                                                  1334 
1335         return NULL;                             1335         return NULL;
1336 }                                                1336 }
1337                                                  1337 
1338 static struct btree_path *have_node_at_pos(st    1338 static struct btree_path *have_node_at_pos(struct btree_trans *trans, struct btree_path *path)
1339 {                                                1339 {
1340         struct btree_path *sib;                  1340         struct btree_path *sib;
1341                                                  1341 
1342         sib = prev_btree_path(trans, path);      1342         sib = prev_btree_path(trans, path);
1343         if (sib && sib->level == path->level     1343         if (sib && sib->level == path->level && path_l(sib)->b == path_l(path)->b)
1344                 return sib;                      1344                 return sib;
1345                                                  1345 
1346         sib = next_btree_path(trans, path);      1346         sib = next_btree_path(trans, path);
1347         if (sib && sib->level == path->level     1347         if (sib && sib->level == path->level && path_l(sib)->b == path_l(path)->b)
1348                 return sib;                      1348                 return sib;
1349                                                  1349 
1350         return NULL;                             1350         return NULL;
1351 }                                                1351 }
1352                                                  1352 
1353 static inline void __bch2_path_free(struct bt    1353 static inline void __bch2_path_free(struct btree_trans *trans, btree_path_idx_t path)
1354 {                                                1354 {
1355         __bch2_btree_path_unlock(trans, trans    1355         __bch2_btree_path_unlock(trans, trans->paths + path);
1356         btree_path_list_remove(trans, trans->    1356         btree_path_list_remove(trans, trans->paths + path);
1357         __clear_bit(path, trans->paths_alloca    1357         __clear_bit(path, trans->paths_allocated);
1358 }                                                1358 }
1359                                                  1359 
1360 static bool bch2_btree_path_can_relock(struct    1360 static bool bch2_btree_path_can_relock(struct btree_trans *trans, struct btree_path *path)
1361 {                                                1361 {
1362         unsigned l = path->level;                1362         unsigned l = path->level;
1363                                                  1363 
1364         do {                                     1364         do {
1365                 if (!btree_path_node(path, l)    1365                 if (!btree_path_node(path, l))
1366                         break;                   1366                         break;
1367                                                  1367 
1368                 if (!is_btree_node(path, l))     1368                 if (!is_btree_node(path, l))
1369                         return false;            1369                         return false;
1370                                                  1370 
1371                 if (path->l[l].lock_seq != pa    1371                 if (path->l[l].lock_seq != path->l[l].b->c.lock.seq)
1372                         return false;            1372                         return false;
1373                                                  1373 
1374                 l++;                             1374                 l++;
1375         } while (l < path->locks_want);          1375         } while (l < path->locks_want);
1376                                                  1376 
1377         return true;                             1377         return true;
1378 }                                                1378 }
1379                                                  1379 
1380 void bch2_path_put(struct btree_trans *trans,    1380 void bch2_path_put(struct btree_trans *trans, btree_path_idx_t path_idx, bool intent)
1381 {                                                1381 {
1382         struct btree_path *path = trans->path    1382         struct btree_path *path = trans->paths + path_idx, *dup;
1383                                                  1383 
1384         if (!__btree_path_put(trans, path, in    1384         if (!__btree_path_put(trans, path, intent))
1385                 return;                          1385                 return;
1386                                                  1386 
1387         dup = path->preserve                     1387         dup = path->preserve
1388                 ? have_path_at_pos(trans, pat    1388                 ? have_path_at_pos(trans, path)
1389                 : have_node_at_pos(trans, pat    1389                 : have_node_at_pos(trans, path);
1390                                                  1390 
1391         trace_btree_path_free(trans, path_idx    1391         trace_btree_path_free(trans, path_idx, dup);
1392                                                  1392 
1393         if (!dup && !(!path->preserve && !is_    1393         if (!dup && !(!path->preserve && !is_btree_node(path, path->level)))
1394                 return;                          1394                 return;
1395                                                  1395 
1396         if (path->should_be_locked && !trans-    1396         if (path->should_be_locked && !trans->restarted) {
1397                 if (!dup)                        1397                 if (!dup)
1398                         return;                  1398                         return;
1399                                                  1399 
1400                 if (!(trans->locked              1400                 if (!(trans->locked
1401                       ? bch2_btree_path_reloc    1401                       ? bch2_btree_path_relock_norestart(trans, dup)
1402                       : bch2_btree_path_can_r    1402                       : bch2_btree_path_can_relock(trans, dup)))
1403                         return;                  1403                         return;
1404         }                                        1404         }
1405                                                  1405 
1406         if (dup) {                               1406         if (dup) {
1407                 dup->preserve           |= pa    1407                 dup->preserve           |= path->preserve;
1408                 dup->should_be_locked   |= pa    1408                 dup->should_be_locked   |= path->should_be_locked;
1409         }                                        1409         }
1410                                                  1410 
1411         __bch2_path_free(trans, path_idx);       1411         __bch2_path_free(trans, path_idx);
1412 }                                                1412 }
1413                                                  1413 
1414 static void bch2_path_put_nokeep(struct btree    1414 static void bch2_path_put_nokeep(struct btree_trans *trans, btree_path_idx_t path,
1415                                  bool intent)    1415                                  bool intent)
1416 {                                                1416 {
1417         if (!__btree_path_put(trans, trans->p    1417         if (!__btree_path_put(trans, trans->paths + path, intent))
1418                 return;                          1418                 return;
1419                                                  1419 
1420         __bch2_path_free(trans, path);           1420         __bch2_path_free(trans, path);
1421 }                                                1421 }
1422                                                  1422 
1423 void __noreturn bch2_trans_restart_error(stru    1423 void __noreturn bch2_trans_restart_error(struct btree_trans *trans, u32 restart_count)
1424 {                                                1424 {
1425         panic("trans->restart_count %u, shoul    1425         panic("trans->restart_count %u, should be %u, last restarted by %pS\n",
1426               trans->restart_count, restart_c    1426               trans->restart_count, restart_count,
1427               (void *) trans->last_begin_ip);    1427               (void *) trans->last_begin_ip);
1428 }                                                1428 }
1429                                                  1429 
1430 void __noreturn bch2_trans_in_restart_error(s    1430 void __noreturn bch2_trans_in_restart_error(struct btree_trans *trans)
1431 {                                                1431 {
1432         panic("in transaction restart: %s, la    1432         panic("in transaction restart: %s, last restarted by %pS\n",
1433               bch2_err_str(trans->restarted),    1433               bch2_err_str(trans->restarted),
1434               (void *) trans->last_restarted_    1434               (void *) trans->last_restarted_ip);
1435 }                                                1435 }
1436                                                  1436 
1437 void __noreturn bch2_trans_unlocked_error(str    1437 void __noreturn bch2_trans_unlocked_error(struct btree_trans *trans)
1438 {                                                1438 {
1439         panic("trans should be locked, unlock    1439         panic("trans should be locked, unlocked by %pS\n",
1440               (void *) trans->last_unlock_ip)    1440               (void *) trans->last_unlock_ip);
1441 }                                                1441 }
1442                                                  1442 
1443 noinline __cold                                  1443 noinline __cold
1444 void bch2_trans_updates_to_text(struct printb    1444 void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans)
1445 {                                                1445 {
1446         prt_printf(buf, "%u transaction updat    1446         prt_printf(buf, "%u transaction updates for %s journal seq %llu\n",
1447                    trans->nr_updates, trans->    1447                    trans->nr_updates, trans->fn, trans->journal_res.seq);
1448         printbuf_indent_add(buf, 2);             1448         printbuf_indent_add(buf, 2);
1449                                                  1449 
1450         trans_for_each_update(trans, i) {        1450         trans_for_each_update(trans, i) {
1451                 struct bkey_s_c old = { &i->o    1451                 struct bkey_s_c old = { &i->old_k, i->old_v };
1452                                                  1452 
1453                 prt_printf(buf, "update: btre    1453                 prt_printf(buf, "update: btree=%s cached=%u %pS\n",
1454                        bch2_btree_id_str(i->b    1454                        bch2_btree_id_str(i->btree_id),
1455                        i->cached,                1455                        i->cached,
1456                        (void *) i->ip_allocat    1456                        (void *) i->ip_allocated);
1457                                                  1457 
1458                 prt_printf(buf, "  old ");       1458                 prt_printf(buf, "  old ");
1459                 bch2_bkey_val_to_text(buf, tr    1459                 bch2_bkey_val_to_text(buf, trans->c, old);
1460                 prt_newline(buf);                1460                 prt_newline(buf);
1461                                                  1461 
1462                 prt_printf(buf, "  new ");       1462                 prt_printf(buf, "  new ");
1463                 bch2_bkey_val_to_text(buf, tr    1463                 bch2_bkey_val_to_text(buf, trans->c, bkey_i_to_s_c(i->k));
1464                 prt_newline(buf);                1464                 prt_newline(buf);
1465         }                                        1465         }
1466                                                  1466 
1467         for (struct jset_entry *e = trans->jo    1467         for (struct jset_entry *e = trans->journal_entries;
1468              e != btree_trans_journal_entries    1468              e != btree_trans_journal_entries_top(trans);
1469              e = vstruct_next(e))                1469              e = vstruct_next(e))
1470                 bch2_journal_entry_to_text(bu    1470                 bch2_journal_entry_to_text(buf, trans->c, e);
1471                                                  1471 
1472         printbuf_indent_sub(buf, 2);             1472         printbuf_indent_sub(buf, 2);
1473 }                                                1473 }
1474                                                  1474 
1475 noinline __cold                                  1475 noinline __cold
1476 void bch2_dump_trans_updates(struct btree_tra    1476 void bch2_dump_trans_updates(struct btree_trans *trans)
1477 {                                                1477 {
1478         struct printbuf buf = PRINTBUF;          1478         struct printbuf buf = PRINTBUF;
1479                                                  1479 
1480         bch2_trans_updates_to_text(&buf, tran    1480         bch2_trans_updates_to_text(&buf, trans);
1481         bch2_print_str(trans->c, buf.buf);       1481         bch2_print_str(trans->c, buf.buf);
1482         printbuf_exit(&buf);                     1482         printbuf_exit(&buf);
1483 }                                                1483 }
1484                                                  1484 
1485 static void bch2_btree_path_to_text_short(str    1485 static void bch2_btree_path_to_text_short(struct printbuf *out, struct btree_trans *trans, btree_path_idx_t path_idx)
1486 {                                                1486 {
1487         struct btree_path *path = trans->path    1487         struct btree_path *path = trans->paths + path_idx;
1488                                                  1488 
1489         prt_printf(out, "path: idx %3u ref %u    1489         prt_printf(out, "path: idx %3u ref %u:%u %c %c %c btree=%s l=%u pos ",
1490                    path_idx, path->ref, path-    1490                    path_idx, path->ref, path->intent_ref,
1491                    path->preserve ? 'P' : ' '    1491                    path->preserve ? 'P' : ' ',
1492                    path->should_be_locked ? '    1492                    path->should_be_locked ? 'S' : ' ',
1493                    path->cached ? 'C' : 'B',     1493                    path->cached ? 'C' : 'B',
1494                    bch2_btree_id_str(path->bt    1494                    bch2_btree_id_str(path->btree_id),
1495                    path->level);                 1495                    path->level);
1496         bch2_bpos_to_text(out, path->pos);       1496         bch2_bpos_to_text(out, path->pos);
1497                                                  1497 
1498         if (!path->cached && btree_node_locke    1498         if (!path->cached && btree_node_locked(path, path->level)) {
1499                 prt_char(out, ' ');              1499                 prt_char(out, ' ');
1500                 struct btree *b = path_l(path    1500                 struct btree *b = path_l(path)->b;
1501                 bch2_bpos_to_text(out, b->dat    1501                 bch2_bpos_to_text(out, b->data->min_key);
1502                 prt_char(out, '-');              1502                 prt_char(out, '-');
1503                 bch2_bpos_to_text(out, b->key    1503                 bch2_bpos_to_text(out, b->key.k.p);
1504         }                                        1504         }
1505                                                  1505 
1506 #ifdef TRACK_PATH_ALLOCATED                      1506 #ifdef TRACK_PATH_ALLOCATED
1507         prt_printf(out, " %pS", (void *) path    1507         prt_printf(out, " %pS", (void *) path->ip_allocated);
1508 #endif                                           1508 #endif
1509 }                                                1509 }
1510                                                  1510 
1511 static const char *btree_node_locked_str(enum    1511 static const char *btree_node_locked_str(enum btree_node_locked_type t)
1512 {                                                1512 {
1513         switch (t) {                             1513         switch (t) {
1514         case BTREE_NODE_UNLOCKED:                1514         case BTREE_NODE_UNLOCKED:
1515                 return "unlocked";               1515                 return "unlocked";
1516         case BTREE_NODE_READ_LOCKED:             1516         case BTREE_NODE_READ_LOCKED:
1517                 return "read";                   1517                 return "read";
1518         case BTREE_NODE_INTENT_LOCKED:           1518         case BTREE_NODE_INTENT_LOCKED:
1519                 return "intent";                 1519                 return "intent";
1520         case BTREE_NODE_WRITE_LOCKED:            1520         case BTREE_NODE_WRITE_LOCKED:
1521                 return "write";                  1521                 return "write";
1522         default:                                 1522         default:
1523                 return NULL;                     1523                 return NULL;
1524         }                                        1524         }
1525 }                                                1525 }
1526                                                  1526 
1527 void bch2_btree_path_to_text(struct printbuf     1527 void bch2_btree_path_to_text(struct printbuf *out, struct btree_trans *trans, btree_path_idx_t path_idx)
1528 {                                                1528 {
1529         bch2_btree_path_to_text_short(out, tr    1529         bch2_btree_path_to_text_short(out, trans, path_idx);
1530                                                  1530 
1531         struct btree_path *path = trans->path    1531         struct btree_path *path = trans->paths + path_idx;
1532                                                  1532 
1533         prt_printf(out, " uptodate %u locks_w    1533         prt_printf(out, " uptodate %u locks_want %u", path->uptodate, path->locks_want);
1534         prt_newline(out);                        1534         prt_newline(out);
1535                                                  1535 
1536         printbuf_indent_add(out, 2);             1536         printbuf_indent_add(out, 2);
1537         for (unsigned l = 0; l < BTREE_MAX_DE    1537         for (unsigned l = 0; l < BTREE_MAX_DEPTH; l++) {
1538                 prt_printf(out, "l=%u locks %    1538                 prt_printf(out, "l=%u locks %s seq %u node ", l,
1539                            btree_node_locked_    1539                            btree_node_locked_str(btree_node_locked_type(path, l)),
1540                            path->l[l].lock_se    1540                            path->l[l].lock_seq);
1541                                                  1541 
1542                 int ret = PTR_ERR_OR_ZERO(pat    1542                 int ret = PTR_ERR_OR_ZERO(path->l[l].b);
1543                 if (ret)                         1543                 if (ret)
1544                         prt_str(out, bch2_err    1544                         prt_str(out, bch2_err_str(ret));
1545                 else                             1545                 else
1546                         prt_printf(out, "%px"    1546                         prt_printf(out, "%px", path->l[l].b);
1547                 prt_newline(out);                1547                 prt_newline(out);
1548         }                                        1548         }
1549         printbuf_indent_sub(out, 2);             1549         printbuf_indent_sub(out, 2);
1550 }                                                1550 }
1551                                                  1551 
1552 static noinline __cold                           1552 static noinline __cold
1553 void __bch2_trans_paths_to_text(struct printb    1553 void __bch2_trans_paths_to_text(struct printbuf *out, struct btree_trans *trans,
1554                                 bool nosort)     1554                                 bool nosort)
1555 {                                                1555 {
1556         struct trans_for_each_path_inorder_it    1556         struct trans_for_each_path_inorder_iter iter;
1557                                                  1557 
1558         if (!nosort)                             1558         if (!nosort)
1559                 btree_trans_sort_paths(trans)    1559                 btree_trans_sort_paths(trans);
1560                                                  1560 
1561         trans_for_each_path_idx_inorder(trans    1561         trans_for_each_path_idx_inorder(trans, iter) {
1562                 bch2_btree_path_to_text_short    1562                 bch2_btree_path_to_text_short(out, trans, iter.path_idx);
1563                 prt_newline(out);                1563                 prt_newline(out);
1564         }                                        1564         }
1565 }                                                1565 }
1566                                                  1566 
1567 noinline __cold                                  1567 noinline __cold
1568 void bch2_trans_paths_to_text(struct printbuf    1568 void bch2_trans_paths_to_text(struct printbuf *out, struct btree_trans *trans)
1569 {                                                1569 {
1570         __bch2_trans_paths_to_text(out, trans    1570         __bch2_trans_paths_to_text(out, trans, false);
1571 }                                                1571 }
1572                                                  1572 
1573 static noinline __cold                           1573 static noinline __cold
1574 void __bch2_dump_trans_paths_updates(struct b    1574 void __bch2_dump_trans_paths_updates(struct btree_trans *trans, bool nosort)
1575 {                                                1575 {
1576         struct printbuf buf = PRINTBUF;          1576         struct printbuf buf = PRINTBUF;
1577                                                  1577 
1578         __bch2_trans_paths_to_text(&buf, tran    1578         __bch2_trans_paths_to_text(&buf, trans, nosort);
1579         bch2_trans_updates_to_text(&buf, tran    1579         bch2_trans_updates_to_text(&buf, trans);
1580                                                  1580 
1581         bch2_print_str(trans->c, buf.buf);       1581         bch2_print_str(trans->c, buf.buf);
1582         printbuf_exit(&buf);                     1582         printbuf_exit(&buf);
1583 }                                                1583 }
1584                                                  1584 
1585 noinline __cold                                  1585 noinline __cold
1586 void bch2_dump_trans_paths_updates(struct btr    1586 void bch2_dump_trans_paths_updates(struct btree_trans *trans)
1587 {                                                1587 {
1588         __bch2_dump_trans_paths_updates(trans    1588         __bch2_dump_trans_paths_updates(trans, false);
1589 }                                                1589 }
1590                                                  1590 
1591 noinline __cold                                  1591 noinline __cold
1592 static void bch2_trans_update_max_paths(struc    1592 static void bch2_trans_update_max_paths(struct btree_trans *trans)
1593 {                                                1593 {
1594         struct btree_transaction_stats *s = b    1594         struct btree_transaction_stats *s = btree_trans_stats(trans);
1595         struct printbuf buf = PRINTBUF;          1595         struct printbuf buf = PRINTBUF;
1596         size_t nr = bitmap_weight(trans->path    1596         size_t nr = bitmap_weight(trans->paths_allocated, trans->nr_paths);
1597                                                  1597 
1598         bch2_trans_paths_to_text(&buf, trans)    1598         bch2_trans_paths_to_text(&buf, trans);
1599                                                  1599 
1600         if (!buf.allocation_failure) {           1600         if (!buf.allocation_failure) {
1601                 mutex_lock(&s->lock);            1601                 mutex_lock(&s->lock);
1602                 if (nr > s->nr_max_paths) {      1602                 if (nr > s->nr_max_paths) {
1603                         s->nr_max_paths = nr;    1603                         s->nr_max_paths = nr;
1604                         swap(s->max_paths_tex    1604                         swap(s->max_paths_text, buf.buf);
1605                 }                                1605                 }
1606                 mutex_unlock(&s->lock);          1606                 mutex_unlock(&s->lock);
1607         }                                        1607         }
1608                                                  1608 
1609         printbuf_exit(&buf);                     1609         printbuf_exit(&buf);
1610                                                  1610 
1611         trans->nr_paths_max = nr;                1611         trans->nr_paths_max = nr;
1612 }                                                1612 }
1613                                                  1613 
1614 noinline __cold                                  1614 noinline __cold
1615 int __bch2_btree_trans_too_many_iters(struct     1615 int __bch2_btree_trans_too_many_iters(struct btree_trans *trans)
1616 {                                                1616 {
1617         if (trace_trans_restart_too_many_iter    1617         if (trace_trans_restart_too_many_iters_enabled()) {
1618                 struct printbuf buf = PRINTBU    1618                 struct printbuf buf = PRINTBUF;
1619                                                  1619 
1620                 bch2_trans_paths_to_text(&buf    1620                 bch2_trans_paths_to_text(&buf, trans);
1621                 trace_trans_restart_too_many_    1621                 trace_trans_restart_too_many_iters(trans, _THIS_IP_, buf.buf);
1622                 printbuf_exit(&buf);             1622                 printbuf_exit(&buf);
1623         }                                        1623         }
1624                                                  1624 
1625         count_event(trans->c, trans_restart_t    1625         count_event(trans->c, trans_restart_too_many_iters);
1626                                                  1626 
1627         return btree_trans_restart(trans, BCH    1627         return btree_trans_restart(trans, BCH_ERR_transaction_restart_too_many_iters);
1628 }                                                1628 }
1629                                                  1629 
1630 static noinline void btree_path_overflow(stru    1630 static noinline void btree_path_overflow(struct btree_trans *trans)
1631 {                                                1631 {
1632         bch2_dump_trans_paths_updates(trans);    1632         bch2_dump_trans_paths_updates(trans);
1633         bch_err(trans->c, "trans path overflo    1633         bch_err(trans->c, "trans path overflow");
1634 }                                                1634 }
1635                                                  1635 
1636 static noinline void btree_paths_realloc(stru    1636 static noinline void btree_paths_realloc(struct btree_trans *trans)
1637 {                                                1637 {
1638         unsigned nr = trans->nr_paths * 2;       1638         unsigned nr = trans->nr_paths * 2;
1639                                                  1639 
1640         void *p = kvzalloc(BITS_TO_LONGS(nr)     1640         void *p = kvzalloc(BITS_TO_LONGS(nr) * sizeof(unsigned long) +
1641                           sizeof(struct btree    1641                           sizeof(struct btree_trans_paths) +
1642                           nr * sizeof(struct     1642                           nr * sizeof(struct btree_path) +
1643                           nr * sizeof(btree_p    1643                           nr * sizeof(btree_path_idx_t) + 8 +
1644                           nr * sizeof(struct     1644                           nr * sizeof(struct btree_insert_entry), GFP_KERNEL|__GFP_NOFAIL);
1645                                                  1645 
1646         unsigned long *paths_allocated = p;      1646         unsigned long *paths_allocated = p;
1647         memcpy(paths_allocated, trans->paths_    1647         memcpy(paths_allocated, trans->paths_allocated, BITS_TO_LONGS(trans->nr_paths) * sizeof(unsigned long));
1648         p += BITS_TO_LONGS(nr) * sizeof(unsig    1648         p += BITS_TO_LONGS(nr) * sizeof(unsigned long);
1649                                                  1649 
1650         p += sizeof(struct btree_trans_paths)    1650         p += sizeof(struct btree_trans_paths);
1651         struct btree_path *paths = p;            1651         struct btree_path *paths = p;
1652         *trans_paths_nr(paths) = nr;             1652         *trans_paths_nr(paths) = nr;
1653         memcpy(paths, trans->paths, trans->nr    1653         memcpy(paths, trans->paths, trans->nr_paths * sizeof(struct btree_path));
1654         p += nr * sizeof(struct btree_path);     1654         p += nr * sizeof(struct btree_path);
1655                                                  1655 
1656         btree_path_idx_t *sorted = p;            1656         btree_path_idx_t *sorted = p;
1657         memcpy(sorted, trans->sorted, trans->    1657         memcpy(sorted, trans->sorted, trans->nr_sorted * sizeof(btree_path_idx_t));
1658         p += nr * sizeof(btree_path_idx_t) +     1658         p += nr * sizeof(btree_path_idx_t) + 8;
1659                                                  1659 
1660         struct btree_insert_entry *updates =     1660         struct btree_insert_entry *updates = p;
1661         memcpy(updates, trans->updates, trans    1661         memcpy(updates, trans->updates, trans->nr_paths * sizeof(struct btree_insert_entry));
1662                                                  1662 
1663         unsigned long *old = trans->paths_all    1663         unsigned long *old = trans->paths_allocated;
1664                                                  1664 
1665         rcu_assign_pointer(trans->paths_alloc    1665         rcu_assign_pointer(trans->paths_allocated,      paths_allocated);
1666         rcu_assign_pointer(trans->paths,         1666         rcu_assign_pointer(trans->paths,                paths);
1667         rcu_assign_pointer(trans->sorted,        1667         rcu_assign_pointer(trans->sorted,               sorted);
1668         rcu_assign_pointer(trans->updates,       1668         rcu_assign_pointer(trans->updates,              updates);
1669                                                  1669 
1670         trans->nr_paths         = nr;            1670         trans->nr_paths         = nr;
1671                                                  1671 
1672         if (old != trans->_paths_allocated)      1672         if (old != trans->_paths_allocated)
1673                 kfree_rcu_mightsleep(old);       1673                 kfree_rcu_mightsleep(old);
1674 }                                                1674 }
1675                                                  1675 
1676 static inline btree_path_idx_t btree_path_all    1676 static inline btree_path_idx_t btree_path_alloc(struct btree_trans *trans,
1677                                                  1677                                                 btree_path_idx_t pos)
1678 {                                                1678 {
1679         btree_path_idx_t idx = find_first_zer    1679         btree_path_idx_t idx = find_first_zero_bit(trans->paths_allocated, trans->nr_paths);
1680                                                  1680 
1681         if (unlikely(idx == trans->nr_paths))    1681         if (unlikely(idx == trans->nr_paths)) {
1682                 if (trans->nr_paths == BTREE_    1682                 if (trans->nr_paths == BTREE_ITER_MAX) {
1683                         btree_path_overflow(t    1683                         btree_path_overflow(trans);
1684                         return 0;                1684                         return 0;
1685                 }                                1685                 }
1686                                                  1686 
1687                 btree_paths_realloc(trans);      1687                 btree_paths_realloc(trans);
1688         }                                        1688         }
1689                                                  1689 
1690         /*                                       1690         /*
1691          * Do this before marking the new pat    1691          * Do this before marking the new path as allocated, since it won't be
1692          * initialized yet:                      1692          * initialized yet:
1693          */                                      1693          */
1694         if (unlikely(idx > trans->nr_paths_ma    1694         if (unlikely(idx > trans->nr_paths_max))
1695                 bch2_trans_update_max_paths(t    1695                 bch2_trans_update_max_paths(trans);
1696                                                  1696 
1697         __set_bit(idx, trans->paths_allocated    1697         __set_bit(idx, trans->paths_allocated);
1698                                                  1698 
1699         struct btree_path *path = &trans->pat    1699         struct btree_path *path = &trans->paths[idx];
1700         path->ref               = 0;             1700         path->ref               = 0;
1701         path->intent_ref        = 0;             1701         path->intent_ref        = 0;
1702         path->nodes_locked      = 0;             1702         path->nodes_locked      = 0;
1703                                                  1703 
1704         btree_path_list_add(trans, pos, idx);    1704         btree_path_list_add(trans, pos, idx);
1705         trans->paths_sorted = false;             1705         trans->paths_sorted = false;
1706         return idx;                              1706         return idx;
1707 }                                                1707 }
1708                                                  1708 
1709 btree_path_idx_t bch2_path_get(struct btree_t    1709 btree_path_idx_t bch2_path_get(struct btree_trans *trans,
1710                              enum btree_id bt    1710                              enum btree_id btree_id, struct bpos pos,
1711                              unsigned locks_w    1711                              unsigned locks_want, unsigned level,
1712                              unsigned flags,     1712                              unsigned flags, unsigned long ip)
1713 {                                                1713 {
1714         struct btree_path *path;                 1714         struct btree_path *path;
1715         bool cached = flags & BTREE_ITER_cach    1715         bool cached = flags & BTREE_ITER_cached;
1716         bool intent = flags & BTREE_ITER_inte    1716         bool intent = flags & BTREE_ITER_intent;
1717         struct trans_for_each_path_inorder_it    1717         struct trans_for_each_path_inorder_iter iter;
1718         btree_path_idx_t path_pos = 0, path_i    1718         btree_path_idx_t path_pos = 0, path_idx;
1719                                                  1719 
1720         bch2_trans_verify_not_unlocked(trans)    1720         bch2_trans_verify_not_unlocked(trans);
1721         bch2_trans_verify_not_in_restart(tran    1721         bch2_trans_verify_not_in_restart(trans);
1722         bch2_trans_verify_locks(trans);          1722         bch2_trans_verify_locks(trans);
1723                                                  1723 
1724         btree_trans_sort_paths(trans);           1724         btree_trans_sort_paths(trans);
1725                                                  1725 
1726         trans_for_each_path_inorder(trans, pa    1726         trans_for_each_path_inorder(trans, path, iter) {
1727                 if (__btree_path_cmp(path,       1727                 if (__btree_path_cmp(path,
1728                                      btree_id    1728                                      btree_id,
1729                                      cached,     1729                                      cached,
1730                                      pos,        1730                                      pos,
1731                                      level) >    1731                                      level) > 0)
1732                         break;                   1732                         break;
1733                                                  1733 
1734                 path_pos = iter.path_idx;        1734                 path_pos = iter.path_idx;
1735         }                                        1735         }
1736                                                  1736 
1737         if (path_pos &&                          1737         if (path_pos &&
1738             trans->paths[path_pos].cached        1738             trans->paths[path_pos].cached       == cached &&
1739             trans->paths[path_pos].btree_id      1739             trans->paths[path_pos].btree_id     == btree_id &&
1740             trans->paths[path_pos].level         1740             trans->paths[path_pos].level        == level) {
1741                 trace_btree_path_get(trans, t    1741                 trace_btree_path_get(trans, trans->paths + path_pos, &pos);
1742                                                  1742 
1743                 __btree_path_get(trans, trans    1743                 __btree_path_get(trans, trans->paths + path_pos, intent);
1744                 path_idx = bch2_btree_path_se    1744                 path_idx = bch2_btree_path_set_pos(trans, path_pos, pos, intent, ip);
1745                 path = trans->paths + path_id    1745                 path = trans->paths + path_idx;
1746         } else {                                 1746         } else {
1747                 path_idx = btree_path_alloc(t    1747                 path_idx = btree_path_alloc(trans, path_pos);
1748                 path = trans->paths + path_id    1748                 path = trans->paths + path_idx;
1749                                                  1749 
1750                 __btree_path_get(trans, path,    1750                 __btree_path_get(trans, path, intent);
1751                 path->pos                        1751                 path->pos                       = pos;
1752                 path->btree_id                   1752                 path->btree_id                  = btree_id;
1753                 path->cached                     1753                 path->cached                    = cached;
1754                 path->uptodate                   1754                 path->uptodate                  = BTREE_ITER_NEED_TRAVERSE;
1755                 path->should_be_locked           1755                 path->should_be_locked          = false;
1756                 path->level                      1756                 path->level                     = level;
1757                 path->locks_want                 1757                 path->locks_want                = locks_want;
1758                 path->nodes_locked               1758                 path->nodes_locked              = 0;
1759                 for (unsigned i = 0; i < ARRA    1759                 for (unsigned i = 0; i < ARRAY_SIZE(path->l); i++)
1760                         path->l[i].b             1760                         path->l[i].b            = ERR_PTR(-BCH_ERR_no_btree_node_init);
1761 #ifdef TRACK_PATH_ALLOCATED                      1761 #ifdef TRACK_PATH_ALLOCATED
1762                 path->ip_allocated               1762                 path->ip_allocated              = ip;
1763 #endif                                           1763 #endif
1764                 trans->paths_sorted              1764                 trans->paths_sorted             = false;
1765                                                  1765 
1766                 trace_btree_path_alloc(trans,    1766                 trace_btree_path_alloc(trans, path);
1767         }                                        1767         }
1768                                                  1768 
1769         if (!(flags & BTREE_ITER_nopreserve))    1769         if (!(flags & BTREE_ITER_nopreserve))
1770                 path->preserve = true;           1770                 path->preserve = true;
1771                                                  1771 
1772         if (path->intent_ref)                    1772         if (path->intent_ref)
1773                 locks_want = max(locks_want,     1773                 locks_want = max(locks_want, level + 1);
1774                                                  1774 
1775         /*                                       1775         /*
1776          * If the path has locks_want greater    1776          * If the path has locks_want greater than requested, we don't downgrade
1777          * it here - on transaction restart b    1777          * it here - on transaction restart because btree node split needs to
1778          * upgrade locks, we might be putting    1778          * upgrade locks, we might be putting/getting the iterator again.
1779          * Downgrading iterators only happens    1779          * Downgrading iterators only happens via bch2_trans_downgrade(), after
1780          * a successful transaction commit.      1780          * a successful transaction commit.
1781          */                                      1781          */
1782                                                  1782 
1783         locks_want = min(locks_want, BTREE_MA    1783         locks_want = min(locks_want, BTREE_MAX_DEPTH);
1784         if (locks_want > path->locks_want)       1784         if (locks_want > path->locks_want)
1785                 bch2_btree_path_upgrade_noupg    1785                 bch2_btree_path_upgrade_noupgrade_sibs(trans, path, locks_want, NULL);
1786                                                  1786 
1787         return path_idx;                         1787         return path_idx;
1788 }                                                1788 }
1789                                                  1789 
1790 btree_path_idx_t bch2_path_get_unlocked_mut(s    1790 btree_path_idx_t bch2_path_get_unlocked_mut(struct btree_trans *trans,
1791                                             e    1791                                             enum btree_id btree_id,
1792                                             u    1792                                             unsigned level,
1793                                             s    1793                                             struct bpos pos)
1794 {                                                1794 {
1795         btree_path_idx_t path_idx = bch2_path    1795         btree_path_idx_t path_idx = bch2_path_get(trans, btree_id, pos, level + 1, level,
1796                              BTREE_ITER_nopre    1796                              BTREE_ITER_nopreserve|
1797                              BTREE_ITER_inten    1797                              BTREE_ITER_intent, _RET_IP_);
1798         path_idx = bch2_btree_path_make_mut(t    1798         path_idx = bch2_btree_path_make_mut(trans, path_idx, true, _RET_IP_);
1799                                                  1799 
1800         struct btree_path *path = trans->path    1800         struct btree_path *path = trans->paths + path_idx;
1801         bch2_btree_path_downgrade(trans, path    1801         bch2_btree_path_downgrade(trans, path);
1802         __bch2_btree_path_unlock(trans, path)    1802         __bch2_btree_path_unlock(trans, path);
1803         return path_idx;                         1803         return path_idx;
1804 }                                                1804 }
1805                                                  1805 
1806 struct bkey_s_c bch2_btree_path_peek_slot(str    1806 struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct bkey *u)
1807 {                                                1807 {
1808                                                  1808 
1809         struct btree_path_level *l = path_l(p    1809         struct btree_path_level *l = path_l(path);
1810         struct bkey_packed *_k;                  1810         struct bkey_packed *_k;
1811         struct bkey_s_c k;                       1811         struct bkey_s_c k;
1812                                                  1812 
1813         if (unlikely(!l->b))                     1813         if (unlikely(!l->b))
1814                 return bkey_s_c_null;            1814                 return bkey_s_c_null;
1815                                                  1815 
1816         EBUG_ON(path->uptodate != BTREE_ITER_    1816         EBUG_ON(path->uptodate != BTREE_ITER_UPTODATE);
1817         EBUG_ON(!btree_node_locked(path, path    1817         EBUG_ON(!btree_node_locked(path, path->level));
1818                                                  1818 
1819         if (!path->cached) {                     1819         if (!path->cached) {
1820                 _k = bch2_btree_node_iter_pee    1820                 _k = bch2_btree_node_iter_peek_all(&l->iter, l->b);
1821                 k = _k ? bkey_disassemble(l->    1821                 k = _k ? bkey_disassemble(l->b, _k, u) : bkey_s_c_null;
1822                                                  1822 
1823                 EBUG_ON(k.k && bkey_deleted(k    1823                 EBUG_ON(k.k && bkey_deleted(k.k) && bpos_eq(k.k->p, path->pos));
1824                                                  1824 
1825                 if (!k.k || !bpos_eq(path->po    1825                 if (!k.k || !bpos_eq(path->pos, k.k->p))
1826                         goto hole;               1826                         goto hole;
1827         } else {                                 1827         } else {
1828                 struct bkey_cached *ck = (voi    1828                 struct bkey_cached *ck = (void *) path->l[0].b;
1829                 if (!ck)                         1829                 if (!ck)
1830                         return bkey_s_c_null;    1830                         return bkey_s_c_null;
1831                                                  1831 
1832                 EBUG_ON(path->btree_id != ck-    1832                 EBUG_ON(path->btree_id != ck->key.btree_id ||
1833                         !bkey_eq(path->pos, c    1833                         !bkey_eq(path->pos, ck->key.pos));
1834                                                  1834 
1835                 *u = ck->k->k;                   1835                 *u = ck->k->k;
1836                 k = bkey_i_to_s_c(ck->k);        1836                 k = bkey_i_to_s_c(ck->k);
1837         }                                        1837         }
1838                                                  1838 
1839         return k;                                1839         return k;
1840 hole:                                            1840 hole:
1841         bkey_init(u);                            1841         bkey_init(u);
1842         u->p = path->pos;                        1842         u->p = path->pos;
1843         return (struct bkey_s_c) { u, NULL };    1843         return (struct bkey_s_c) { u, NULL };
1844 }                                                1844 }
1845                                                  1845 
1846                                                  1846 
1847 void bch2_set_btree_iter_dontneed(struct btre    1847 void bch2_set_btree_iter_dontneed(struct btree_iter *iter)
1848 {                                                1848 {
1849         struct btree_trans *trans = iter->tra    1849         struct btree_trans *trans = iter->trans;
1850                                                  1850 
1851         if (!iter->path || trans->restarted)     1851         if (!iter->path || trans->restarted)
1852                 return;                          1852                 return;
1853                                                  1853 
1854         struct btree_path *path = btree_iter_    1854         struct btree_path *path = btree_iter_path(trans, iter);
1855         path->preserve          = false;         1855         path->preserve          = false;
1856         if (path->ref == 1)                      1856         if (path->ref == 1)
1857                 path->should_be_locked  = fal    1857                 path->should_be_locked  = false;
1858 }                                                1858 }
1859 /* Btree iterators: */                           1859 /* Btree iterators: */
1860                                                  1860 
1861 int __must_check                                 1861 int __must_check
1862 __bch2_btree_iter_traverse(struct btree_iter     1862 __bch2_btree_iter_traverse(struct btree_iter *iter)
1863 {                                                1863 {
1864         return bch2_btree_path_traverse(iter-    1864         return bch2_btree_path_traverse(iter->trans, iter->path, iter->flags);
1865 }                                                1865 }
1866                                                  1866 
1867 int __must_check                                 1867 int __must_check
1868 bch2_btree_iter_traverse(struct btree_iter *i    1868 bch2_btree_iter_traverse(struct btree_iter *iter)
1869 {                                                1869 {
1870         struct btree_trans *trans = iter->tra    1870         struct btree_trans *trans = iter->trans;
1871         int ret;                                 1871         int ret;
1872                                                  1872 
1873         bch2_trans_verify_not_unlocked(trans)    1873         bch2_trans_verify_not_unlocked(trans);
1874                                                  1874 
1875         iter->path = bch2_btree_path_set_pos(    1875         iter->path = bch2_btree_path_set_pos(trans, iter->path,
1876                                         btree    1876                                         btree_iter_search_key(iter),
1877                                         iter-    1877                                         iter->flags & BTREE_ITER_intent,
1878                                         btree    1878                                         btree_iter_ip_allocated(iter));
1879                                                  1879 
1880         ret = bch2_btree_path_traverse(iter->    1880         ret = bch2_btree_path_traverse(iter->trans, iter->path, iter->flags);
1881         if (ret)                                 1881         if (ret)
1882                 return ret;                      1882                 return ret;
1883                                                  1883 
1884         struct btree_path *path = btree_iter_    1884         struct btree_path *path = btree_iter_path(trans, iter);
1885         if (btree_path_node(path, path->level    1885         if (btree_path_node(path, path->level))
1886                 btree_path_set_should_be_lock    1886                 btree_path_set_should_be_locked(trans, path);
1887         return 0;                                1887         return 0;
1888 }                                                1888 }
1889                                                  1889 
1890 /* Iterate across nodes (leaf and interior no    1890 /* Iterate across nodes (leaf and interior nodes) */
1891                                                  1891 
1892 struct btree *bch2_btree_iter_peek_node(struc    1892 struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter)
1893 {                                                1893 {
1894         struct btree_trans *trans = iter->tra    1894         struct btree_trans *trans = iter->trans;
1895         struct btree *b = NULL;                  1895         struct btree *b = NULL;
1896         int ret;                                 1896         int ret;
1897                                                  1897 
1898         EBUG_ON(trans->paths[iter->path].cach    1898         EBUG_ON(trans->paths[iter->path].cached);
1899         bch2_btree_iter_verify(iter);            1899         bch2_btree_iter_verify(iter);
1900                                                  1900 
1901         ret = bch2_btree_path_traverse(trans,    1901         ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
1902         if (ret)                                 1902         if (ret)
1903                 goto err;                        1903                 goto err;
1904                                                  1904 
1905         struct btree_path *path = btree_iter_    1905         struct btree_path *path = btree_iter_path(trans, iter);
1906         b = btree_path_node(path, path->level    1906         b = btree_path_node(path, path->level);
1907         if (!b)                                  1907         if (!b)
1908                 goto out;                        1908                 goto out;
1909                                                  1909 
1910         BUG_ON(bpos_lt(b->key.k.p, iter->pos)    1910         BUG_ON(bpos_lt(b->key.k.p, iter->pos));
1911                                                  1911 
1912         bkey_init(&iter->k);                     1912         bkey_init(&iter->k);
1913         iter->k.p = iter->pos = b->key.k.p;      1913         iter->k.p = iter->pos = b->key.k.p;
1914                                                  1914 
1915         iter->path = bch2_btree_path_set_pos(    1915         iter->path = bch2_btree_path_set_pos(trans, iter->path, b->key.k.p,
1916                                         iter-    1916                                         iter->flags & BTREE_ITER_intent,
1917                                         btree    1917                                         btree_iter_ip_allocated(iter));
1918         btree_path_set_should_be_locked(trans    1918         btree_path_set_should_be_locked(trans, btree_iter_path(trans, iter));
1919 out:                                             1919 out:
1920         bch2_btree_iter_verify_entry_exit(ite    1920         bch2_btree_iter_verify_entry_exit(iter);
1921         bch2_btree_iter_verify(iter);            1921         bch2_btree_iter_verify(iter);
1922                                                  1922 
1923         return b;                                1923         return b;
1924 err:                                             1924 err:
1925         b = ERR_PTR(ret);                        1925         b = ERR_PTR(ret);
1926         goto out;                                1926         goto out;
1927 }                                                1927 }
1928                                                  1928 
1929 /* Only kept for -tools */                       1929 /* Only kept for -tools */
1930 struct btree *bch2_btree_iter_peek_node_and_r    1930 struct btree *bch2_btree_iter_peek_node_and_restart(struct btree_iter *iter)
1931 {                                                1931 {
1932         struct btree *b;                         1932         struct btree *b;
1933                                                  1933 
1934         while (b = bch2_btree_iter_peek_node(    1934         while (b = bch2_btree_iter_peek_node(iter),
1935                bch2_err_matches(PTR_ERR_OR_ZE    1935                bch2_err_matches(PTR_ERR_OR_ZERO(b), BCH_ERR_transaction_restart))
1936                 bch2_trans_begin(iter->trans)    1936                 bch2_trans_begin(iter->trans);
1937                                                  1937 
1938         return b;                                1938         return b;
1939 }                                                1939 }
1940                                                  1940 
1941 struct btree *bch2_btree_iter_next_node(struc    1941 struct btree *bch2_btree_iter_next_node(struct btree_iter *iter)
1942 {                                                1942 {
1943         struct btree_trans *trans = iter->tra    1943         struct btree_trans *trans = iter->trans;
1944         struct btree *b = NULL;                  1944         struct btree *b = NULL;
1945         int ret;                                 1945         int ret;
1946                                                  1946 
1947         EBUG_ON(trans->paths[iter->path].cach    1947         EBUG_ON(trans->paths[iter->path].cached);
1948         bch2_trans_verify_not_in_restart(tran    1948         bch2_trans_verify_not_in_restart(trans);
1949         bch2_btree_iter_verify(iter);            1949         bch2_btree_iter_verify(iter);
1950                                                  1950 
1951         ret = bch2_btree_path_traverse(trans,    1951         ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
1952         if (ret)                                 1952         if (ret)
1953                 goto err;                        1953                 goto err;
1954                                                  1954 
1955                                                  1955 
1956         struct btree_path *path = btree_iter_    1956         struct btree_path *path = btree_iter_path(trans, iter);
1957                                                  1957 
1958         /* already at end? */                    1958         /* already at end? */
1959         if (!btree_path_node(path, path->leve    1959         if (!btree_path_node(path, path->level))
1960                 return NULL;                     1960                 return NULL;
1961                                                  1961 
1962         /* got to end? */                        1962         /* got to end? */
1963         if (!btree_path_node(path, path->leve    1963         if (!btree_path_node(path, path->level + 1)) {
1964                 btree_path_set_level_up(trans    1964                 btree_path_set_level_up(trans, path);
1965                 return NULL;                     1965                 return NULL;
1966         }                                        1966         }
1967                                                  1967 
1968         if (!bch2_btree_node_relock(trans, pa    1968         if (!bch2_btree_node_relock(trans, path, path->level + 1)) {
1969                 __bch2_btree_path_unlock(tran    1969                 __bch2_btree_path_unlock(trans, path);
1970                 path->l[path->level].b           1970                 path->l[path->level].b          = ERR_PTR(-BCH_ERR_no_btree_node_relock);
1971                 path->l[path->level + 1].b       1971                 path->l[path->level + 1].b      = ERR_PTR(-BCH_ERR_no_btree_node_relock);
1972                 btree_path_set_dirty(path, BT    1972                 btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
1973                 trace_and_count(trans->c, tra    1973                 trace_and_count(trans->c, trans_restart_relock_next_node, trans, _THIS_IP_, path);
1974                 ret = btree_trans_restart(tra    1974                 ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_relock);
1975                 goto err;                        1975                 goto err;
1976         }                                        1976         }
1977                                                  1977 
1978         b = btree_path_node(path, path->level    1978         b = btree_path_node(path, path->level + 1);
1979                                                  1979 
1980         if (bpos_eq(iter->pos, b->key.k.p)) {    1980         if (bpos_eq(iter->pos, b->key.k.p)) {
1981                 __btree_path_set_level_up(tra    1981                 __btree_path_set_level_up(trans, path, path->level++);
1982         } else {                                 1982         } else {
1983                 if (btree_lock_want(path, pat    1983                 if (btree_lock_want(path, path->level + 1) == BTREE_NODE_UNLOCKED)
1984                         btree_node_unlock(tra    1984                         btree_node_unlock(trans, path, path->level + 1);
1985                                                  1985 
1986                 /*                               1986                 /*
1987                  * Haven't gotten to the end     1987                  * Haven't gotten to the end of the parent node: go back down to
1988                  * the next child node           1988                  * the next child node
1989                  */                              1989                  */
1990                 iter->path = bch2_btree_path_    1990                 iter->path = bch2_btree_path_set_pos(trans, iter->path,
1991                                         bpos_    1991                                         bpos_successor(iter->pos),
1992                                         iter-    1992                                         iter->flags & BTREE_ITER_intent,
1993                                         btree    1993                                         btree_iter_ip_allocated(iter));
1994                                                  1994 
1995                 path = btree_iter_path(trans,    1995                 path = btree_iter_path(trans, iter);
1996                 btree_path_set_level_down(tra    1996                 btree_path_set_level_down(trans, path, iter->min_depth);
1997                                                  1997 
1998                 ret = bch2_btree_path_travers    1998                 ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
1999                 if (ret)                         1999                 if (ret)
2000                         goto err;                2000                         goto err;
2001                                                  2001 
2002                 path = btree_iter_path(trans,    2002                 path = btree_iter_path(trans, iter);
2003                 b = path->l[path->level].b;      2003                 b = path->l[path->level].b;
2004         }                                        2004         }
2005                                                  2005 
2006         bkey_init(&iter->k);                     2006         bkey_init(&iter->k);
2007         iter->k.p = iter->pos = b->key.k.p;      2007         iter->k.p = iter->pos = b->key.k.p;
2008                                                  2008 
2009         iter->path = bch2_btree_path_set_pos(    2009         iter->path = bch2_btree_path_set_pos(trans, iter->path, b->key.k.p,
2010                                         iter-    2010                                         iter->flags & BTREE_ITER_intent,
2011                                         btree    2011                                         btree_iter_ip_allocated(iter));
2012         btree_path_set_should_be_locked(trans    2012         btree_path_set_should_be_locked(trans, btree_iter_path(trans, iter));
2013         EBUG_ON(btree_iter_path(trans, iter)-    2013         EBUG_ON(btree_iter_path(trans, iter)->uptodate);
2014 out:                                             2014 out:
2015         bch2_btree_iter_verify_entry_exit(ite    2015         bch2_btree_iter_verify_entry_exit(iter);
2016         bch2_btree_iter_verify(iter);            2016         bch2_btree_iter_verify(iter);
2017                                                  2017 
2018         return b;                                2018         return b;
2019 err:                                             2019 err:
2020         b = ERR_PTR(ret);                        2020         b = ERR_PTR(ret);
2021         goto out;                                2021         goto out;
2022 }                                                2022 }
2023                                                  2023 
2024 /* Iterate across keys (in leaf nodes only) *    2024 /* Iterate across keys (in leaf nodes only) */
2025                                                  2025 
2026 inline bool bch2_btree_iter_advance(struct bt    2026 inline bool bch2_btree_iter_advance(struct btree_iter *iter)
2027 {                                                2027 {
2028         struct bpos pos = iter->k.p;             2028         struct bpos pos = iter->k.p;
2029         bool ret = !(iter->flags & BTREE_ITER    2029         bool ret = !(iter->flags & BTREE_ITER_all_snapshots
2030                      ? bpos_eq(pos, SPOS_MAX)    2030                      ? bpos_eq(pos, SPOS_MAX)
2031                      : bkey_eq(pos, SPOS_MAX)    2031                      : bkey_eq(pos, SPOS_MAX));
2032                                                  2032 
2033         if (ret && !(iter->flags & BTREE_ITER    2033         if (ret && !(iter->flags & BTREE_ITER_is_extents))
2034                 pos = bkey_successor(iter, po    2034                 pos = bkey_successor(iter, pos);
2035         bch2_btree_iter_set_pos(iter, pos);      2035         bch2_btree_iter_set_pos(iter, pos);
2036         return ret;                              2036         return ret;
2037 }                                                2037 }
2038                                                  2038 
2039 inline bool bch2_btree_iter_rewind(struct btr    2039 inline bool bch2_btree_iter_rewind(struct btree_iter *iter)
2040 {                                                2040 {
2041         struct bpos pos = bkey_start_pos(&ite    2041         struct bpos pos = bkey_start_pos(&iter->k);
2042         bool ret = !(iter->flags & BTREE_ITER    2042         bool ret = !(iter->flags & BTREE_ITER_all_snapshots
2043                      ? bpos_eq(pos, POS_MIN)     2043                      ? bpos_eq(pos, POS_MIN)
2044                      : bkey_eq(pos, POS_MIN))    2044                      : bkey_eq(pos, POS_MIN));
2045                                                  2045 
2046         if (ret && !(iter->flags & BTREE_ITER    2046         if (ret && !(iter->flags & BTREE_ITER_is_extents))
2047                 pos = bkey_predecessor(iter,     2047                 pos = bkey_predecessor(iter, pos);
2048         bch2_btree_iter_set_pos(iter, pos);      2048         bch2_btree_iter_set_pos(iter, pos);
2049         return ret;                              2049         return ret;
2050 }                                                2050 }
2051                                                  2051 
2052 static noinline                                  2052 static noinline
2053 void bch2_btree_trans_peek_prev_updates(struc    2053 void bch2_btree_trans_peek_prev_updates(struct btree_trans *trans, struct btree_iter *iter,
2054                                         struc    2054                                         struct bkey_s_c *k)
2055 {                                                2055 {
2056         struct bpos end = path_l(btree_iter_p    2056         struct bpos end = path_l(btree_iter_path(trans, iter))->b->data->min_key;
2057                                                  2057 
2058         trans_for_each_update(trans, i)          2058         trans_for_each_update(trans, i)
2059                 if (!i->key_cache_already_flu    2059                 if (!i->key_cache_already_flushed &&
2060                     i->btree_id == iter->btre    2060                     i->btree_id == iter->btree_id &&
2061                     bpos_le(i->k->k.p, iter->    2061                     bpos_le(i->k->k.p, iter->pos) &&
2062                     bpos_ge(i->k->k.p, k->k ?    2062                     bpos_ge(i->k->k.p, k->k ? k->k->p : end)) {
2063                         iter->k = i->k->k;       2063                         iter->k = i->k->k;
2064                         *k = bkey_i_to_s_c(i-    2064                         *k = bkey_i_to_s_c(i->k);
2065                 }                                2065                 }
2066 }                                                2066 }
2067                                                  2067 
2068 static noinline                                  2068 static noinline
2069 void bch2_btree_trans_peek_updates(struct btr    2069 void bch2_btree_trans_peek_updates(struct btree_trans *trans, struct btree_iter *iter,
2070                                    struct bke    2070                                    struct bkey_s_c *k)
2071 {                                                2071 {
2072         struct btree_path *path = btree_iter_    2072         struct btree_path *path = btree_iter_path(trans, iter);
2073         struct bpos end = path_l(path)->b->ke    2073         struct bpos end = path_l(path)->b->key.k.p;
2074                                                  2074 
2075         trans_for_each_update(trans, i)          2075         trans_for_each_update(trans, i)
2076                 if (!i->key_cache_already_flu    2076                 if (!i->key_cache_already_flushed &&
2077                     i->btree_id == iter->btre    2077                     i->btree_id == iter->btree_id &&
2078                     bpos_ge(i->k->k.p, path->    2078                     bpos_ge(i->k->k.p, path->pos) &&
2079                     bpos_le(i->k->k.p, k->k ?    2079                     bpos_le(i->k->k.p, k->k ? k->k->p : end)) {
2080                         iter->k = i->k->k;       2080                         iter->k = i->k->k;
2081                         *k = bkey_i_to_s_c(i-    2081                         *k = bkey_i_to_s_c(i->k);
2082                 }                                2082                 }
2083 }                                                2083 }
2084                                                  2084 
2085 static noinline                                  2085 static noinline
2086 void bch2_btree_trans_peek_slot_updates(struc    2086 void bch2_btree_trans_peek_slot_updates(struct btree_trans *trans, struct btree_iter *iter,
2087                                         struc    2087                                         struct bkey_s_c *k)
2088 {                                                2088 {
2089         trans_for_each_update(trans, i)          2089         trans_for_each_update(trans, i)
2090                 if (!i->key_cache_already_flu    2090                 if (!i->key_cache_already_flushed &&
2091                     i->btree_id == iter->btre    2091                     i->btree_id == iter->btree_id &&
2092                     bpos_eq(i->k->k.p, iter->    2092                     bpos_eq(i->k->k.p, iter->pos)) {
2093                         iter->k = i->k->k;       2093                         iter->k = i->k->k;
2094                         *k = bkey_i_to_s_c(i-    2094                         *k = bkey_i_to_s_c(i->k);
2095                 }                                2095                 }
2096 }                                                2096 }
2097                                                  2097 
2098 static struct bkey_i *bch2_btree_journal_peek    2098 static struct bkey_i *bch2_btree_journal_peek(struct btree_trans *trans,
2099                                                  2099                                               struct btree_iter *iter,
2100                                                  2100                                               struct bpos end_pos)
2101 {                                                2101 {
2102         struct btree_path *path = btree_iter_    2102         struct btree_path *path = btree_iter_path(trans, iter);
2103                                                  2103 
2104         return bch2_journal_keys_peek_upto(tr    2104         return bch2_journal_keys_peek_upto(trans->c, iter->btree_id,
2105                                            pa    2105                                            path->level,
2106                                            pa    2106                                            path->pos,
2107                                            en    2107                                            end_pos,
2108                                            &i    2108                                            &iter->journal_idx);
2109 }                                                2109 }
2110                                                  2110 
2111 static noinline                                  2111 static noinline
2112 struct bkey_s_c btree_trans_peek_slot_journal    2112 struct bkey_s_c btree_trans_peek_slot_journal(struct btree_trans *trans,
2113                                                  2113                                               struct btree_iter *iter)
2114 {                                                2114 {
2115         struct btree_path *path = btree_iter_    2115         struct btree_path *path = btree_iter_path(trans, iter);
2116         struct bkey_i *k = bch2_btree_journal    2116         struct bkey_i *k = bch2_btree_journal_peek(trans, iter, path->pos);
2117                                                  2117 
2118         if (k) {                                 2118         if (k) {
2119                 iter->k = k->k;                  2119                 iter->k = k->k;
2120                 return bkey_i_to_s_c(k);         2120                 return bkey_i_to_s_c(k);
2121         } else {                                 2121         } else {
2122                 return bkey_s_c_null;            2122                 return bkey_s_c_null;
2123         }                                        2123         }
2124 }                                                2124 }
2125                                                  2125 
2126 static noinline                                  2126 static noinline
2127 struct bkey_s_c btree_trans_peek_journal(stru    2127 struct bkey_s_c btree_trans_peek_journal(struct btree_trans *trans,
2128                                          stru    2128                                          struct btree_iter *iter,
2129                                          stru    2129                                          struct bkey_s_c k)
2130 {                                                2130 {
2131         struct btree_path *path = btree_iter_    2131         struct btree_path *path = btree_iter_path(trans, iter);
2132         struct bkey_i *next_journal =            2132         struct bkey_i *next_journal =
2133                 bch2_btree_journal_peek(trans    2133                 bch2_btree_journal_peek(trans, iter,
2134                                 k.k ? k.k->p     2134                                 k.k ? k.k->p : path_l(path)->b->key.k.p);
2135                                                  2135 
2136         if (next_journal) {                      2136         if (next_journal) {
2137                 iter->k = next_journal->k;       2137                 iter->k = next_journal->k;
2138                 k = bkey_i_to_s_c(next_journa    2138                 k = bkey_i_to_s_c(next_journal);
2139         }                                        2139         }
2140                                                  2140 
2141         return k;                                2141         return k;
2142 }                                                2142 }
2143                                                  2143 
2144 /*                                               2144 /*
2145  * Checks btree key cache for key at iter->po    2145  * Checks btree key cache for key at iter->pos and returns it if present, or
2146  * bkey_s_c_null:                                2146  * bkey_s_c_null:
2147  */                                              2147  */
2148 static noinline                                  2148 static noinline
2149 struct bkey_s_c btree_trans_peek_key_cache(st    2149 struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos pos)
2150 {                                                2150 {
2151         struct btree_trans *trans = iter->tra    2151         struct btree_trans *trans = iter->trans;
2152         struct bch_fs *c = trans->c;             2152         struct bch_fs *c = trans->c;
2153         struct bkey u;                           2153         struct bkey u;
2154         struct bkey_s_c k;                       2154         struct bkey_s_c k;
2155         int ret;                                 2155         int ret;
2156                                                  2156 
2157         bch2_trans_verify_not_in_restart(tran    2157         bch2_trans_verify_not_in_restart(trans);
2158         bch2_trans_verify_not_unlocked(trans)    2158         bch2_trans_verify_not_unlocked(trans);
2159                                                  2159 
2160         if ((iter->flags & BTREE_ITER_key_cac    2160         if ((iter->flags & BTREE_ITER_key_cache_fill) &&
2161             bpos_eq(iter->pos, pos))             2161             bpos_eq(iter->pos, pos))
2162                 return bkey_s_c_null;            2162                 return bkey_s_c_null;
2163                                                  2163 
2164         if (!bch2_btree_key_cache_find(c, ite    2164         if (!bch2_btree_key_cache_find(c, iter->btree_id, pos))
2165                 return bkey_s_c_null;            2165                 return bkey_s_c_null;
2166                                                  2166 
2167         if (!iter->key_cache_path)               2167         if (!iter->key_cache_path)
2168                 iter->key_cache_path = bch2_p    2168                 iter->key_cache_path = bch2_path_get(trans, iter->btree_id, pos,
2169                                                  2169                                                      iter->flags & BTREE_ITER_intent, 0,
2170                                                  2170                                                      iter->flags|BTREE_ITER_cached|
2171                                                  2171                                                      BTREE_ITER_cached_nofill,
2172                                                  2172                                                      _THIS_IP_);
2173                                                  2173 
2174         iter->key_cache_path = bch2_btree_pat    2174         iter->key_cache_path = bch2_btree_path_set_pos(trans, iter->key_cache_path, pos,
2175                                         iter-    2175                                         iter->flags & BTREE_ITER_intent,
2176                                         btree    2176                                         btree_iter_ip_allocated(iter));
2177                                                  2177 
2178         ret =   bch2_btree_path_traverse(tran    2178         ret =   bch2_btree_path_traverse(trans, iter->key_cache_path,
2179                                          iter    2179                                          iter->flags|BTREE_ITER_cached) ?:
2180                 bch2_btree_path_relock(trans,    2180                 bch2_btree_path_relock(trans, btree_iter_path(trans, iter), _THIS_IP_);
2181         if (unlikely(ret))                       2181         if (unlikely(ret))
2182                 return bkey_s_c_err(ret);        2182                 return bkey_s_c_err(ret);
2183                                                  2183 
2184         btree_path_set_should_be_locked(trans    2184         btree_path_set_should_be_locked(trans, trans->paths + iter->key_cache_path);
2185                                                  2185 
2186         k = bch2_btree_path_peek_slot(trans->    2186         k = bch2_btree_path_peek_slot(trans->paths + iter->key_cache_path, &u);
2187         if (k.k && !bkey_err(k)) {               2187         if (k.k && !bkey_err(k)) {
2188                 iter->k = u;                     2188                 iter->k = u;
2189                 k.k = &iter->k;                  2189                 k.k = &iter->k;
2190         }                                        2190         }
2191         return k;                                2191         return k;
2192 }                                                2192 }
2193                                                  2193 
2194 static struct bkey_s_c __bch2_btree_iter_peek    2194 static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bpos search_key)
2195 {                                                2195 {
2196         struct btree_trans *trans = iter->tra    2196         struct btree_trans *trans = iter->trans;
2197         struct bkey_s_c k, k2;                   2197         struct bkey_s_c k, k2;
2198         int ret;                                 2198         int ret;
2199                                                  2199 
2200         EBUG_ON(btree_iter_path(trans, iter)-    2200         EBUG_ON(btree_iter_path(trans, iter)->cached);
2201         bch2_btree_iter_verify(iter);            2201         bch2_btree_iter_verify(iter);
2202                                                  2202 
2203         while (1) {                              2203         while (1) {
2204                 struct btree_path_level *l;      2204                 struct btree_path_level *l;
2205                                                  2205 
2206                 iter->path = bch2_btree_path_    2206                 iter->path = bch2_btree_path_set_pos(trans, iter->path, search_key,
2207                                         iter-    2207                                         iter->flags & BTREE_ITER_intent,
2208                                         btree    2208                                         btree_iter_ip_allocated(iter));
2209                                                  2209 
2210                 ret = bch2_btree_path_travers    2210                 ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
2211                 if (unlikely(ret)) {             2211                 if (unlikely(ret)) {
2212                         /* ensure that iter->    2212                         /* ensure that iter->k is consistent with iter->pos: */
2213                         bch2_btree_iter_set_p    2213                         bch2_btree_iter_set_pos(iter, iter->pos);
2214                         k = bkey_s_c_err(ret)    2214                         k = bkey_s_c_err(ret);
2215                         goto out;                2215                         goto out;
2216                 }                                2216                 }
2217                                                  2217 
2218                 struct btree_path *path = btr    2218                 struct btree_path *path = btree_iter_path(trans, iter);
2219                 l = path_l(path);                2219                 l = path_l(path);
2220                                                  2220 
2221                 if (unlikely(!l->b)) {           2221                 if (unlikely(!l->b)) {
2222                         /* No btree nodes at     2222                         /* No btree nodes at requested level: */
2223                         bch2_btree_iter_set_p    2223                         bch2_btree_iter_set_pos(iter, SPOS_MAX);
2224                         k = bkey_s_c_null;       2224                         k = bkey_s_c_null;
2225                         goto out;                2225                         goto out;
2226                 }                                2226                 }
2227                                                  2227 
2228                 btree_path_set_should_be_lock    2228                 btree_path_set_should_be_locked(trans, path);
2229                                                  2229 
2230                 k = btree_path_level_peek_all    2230                 k = btree_path_level_peek_all(trans->c, l, &iter->k);
2231                                                  2231 
2232                 if (unlikely(iter->flags & BT    2232                 if (unlikely(iter->flags & BTREE_ITER_with_key_cache) &&
2233                     k.k &&                       2233                     k.k &&
2234                     (k2 = btree_trans_peek_ke    2234                     (k2 = btree_trans_peek_key_cache(iter, k.k->p)).k) {
2235                         k = k2;                  2235                         k = k2;
2236                         ret = bkey_err(k);       2236                         ret = bkey_err(k);
2237                         if (ret) {               2237                         if (ret) {
2238                                 bch2_btree_it    2238                                 bch2_btree_iter_set_pos(iter, iter->pos);
2239                                 goto out;        2239                                 goto out;
2240                         }                        2240                         }
2241                 }                                2241                 }
2242                                                  2242 
2243                 if (unlikely(iter->flags & BT    2243                 if (unlikely(iter->flags & BTREE_ITER_with_journal))
2244                         k = btree_trans_peek_    2244                         k = btree_trans_peek_journal(trans, iter, k);
2245                                                  2245 
2246                 if (unlikely((iter->flags & B    2246                 if (unlikely((iter->flags & BTREE_ITER_with_updates) &&
2247                              trans->nr_update    2247                              trans->nr_updates))
2248                         bch2_btree_trans_peek    2248                         bch2_btree_trans_peek_updates(trans, iter, &k);
2249                                                  2249 
2250                 if (k.k && bkey_deleted(k.k))    2250                 if (k.k && bkey_deleted(k.k)) {
2251                         /*                       2251                         /*
2252                          * If we've got a whi    2252                          * If we've got a whiteout, and it's after the search
2253                          * key, advance the s    2253                          * key, advance the search key to the whiteout instead
2254                          * of just after the     2254                          * of just after the whiteout - it might be a btree
2255                          * whiteout, with a r    2255                          * whiteout, with a real key at the same position, since
2256                          * in the btree delet    2256                          * in the btree deleted keys sort before non deleted.
2257                          */                      2257                          */
2258                         search_key = !bpos_eq    2258                         search_key = !bpos_eq(search_key, k.k->p)
2259                                 ? k.k->p         2259                                 ? k.k->p
2260                                 : bpos_succes    2260                                 : bpos_successor(k.k->p);
2261                         continue;                2261                         continue;
2262                 }                                2262                 }
2263                                                  2263 
2264                 if (likely(k.k)) {               2264                 if (likely(k.k)) {
2265                         break;                   2265                         break;
2266                 } else if (likely(!bpos_eq(l-    2266                 } else if (likely(!bpos_eq(l->b->key.k.p, SPOS_MAX))) {
2267                         /* Advance to next le    2267                         /* Advance to next leaf node: */
2268                         search_key = bpos_suc    2268                         search_key = bpos_successor(l->b->key.k.p);
2269                 } else {                         2269                 } else {
2270                         /* End of btree: */      2270                         /* End of btree: */
2271                         bch2_btree_iter_set_p    2271                         bch2_btree_iter_set_pos(iter, SPOS_MAX);
2272                         k = bkey_s_c_null;       2272                         k = bkey_s_c_null;
2273                         goto out;                2273                         goto out;
2274                 }                                2274                 }
2275         }                                        2275         }
2276 out:                                             2276 out:
2277         bch2_btree_iter_verify(iter);            2277         bch2_btree_iter_verify(iter);
2278                                                  2278 
2279         return k;                                2279         return k;
2280 }                                                2280 }
2281                                                  2281 
2282 /**                                              2282 /**
2283  * bch2_btree_iter_peek_upto() - returns firs    2283  * bch2_btree_iter_peek_upto() - returns first key greater than or equal to
2284  * iterator's current position                   2284  * iterator's current position
2285  * @iter:       iterator to peek from            2285  * @iter:       iterator to peek from
2286  * @end:        search limit: returns keys le    2286  * @end:        search limit: returns keys less than or equal to @end
2287  *                                               2287  *
2288  * Returns:     key if found, or an error ext    2288  * Returns:     key if found, or an error extractable with bkey_err().
2289  */                                              2289  */
2290 struct bkey_s_c bch2_btree_iter_peek_upto(str    2290 struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos end)
2291 {                                                2291 {
2292         struct btree_trans *trans = iter->tra    2292         struct btree_trans *trans = iter->trans;
2293         struct bpos search_key = btree_iter_s    2293         struct bpos search_key = btree_iter_search_key(iter);
2294         struct bkey_s_c k;                       2294         struct bkey_s_c k;
2295         struct bpos iter_pos;                    2295         struct bpos iter_pos;
2296         int ret;                                 2296         int ret;
2297                                                  2297 
2298         bch2_trans_verify_not_unlocked(trans)    2298         bch2_trans_verify_not_unlocked(trans);
2299         EBUG_ON((iter->flags & BTREE_ITER_fil    2299         EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && bkey_eq(end, POS_MAX));
2300                                                  2300 
2301         if (iter->update_path) {                 2301         if (iter->update_path) {
2302                 bch2_path_put_nokeep(trans, i    2302                 bch2_path_put_nokeep(trans, iter->update_path,
2303                                      iter->fl    2303                                      iter->flags & BTREE_ITER_intent);
2304                 iter->update_path = 0;           2304                 iter->update_path = 0;
2305         }                                        2305         }
2306                                                  2306 
2307         bch2_btree_iter_verify_entry_exit(ite    2307         bch2_btree_iter_verify_entry_exit(iter);
2308                                                  2308 
2309         while (1) {                              2309         while (1) {
2310                 k = __bch2_btree_iter_peek(it    2310                 k = __bch2_btree_iter_peek(iter, search_key);
2311                 if (unlikely(!k.k))              2311                 if (unlikely(!k.k))
2312                         goto end;                2312                         goto end;
2313                 if (unlikely(bkey_err(k)))       2313                 if (unlikely(bkey_err(k)))
2314                         goto out_no_locked;      2314                         goto out_no_locked;
2315                                                  2315 
2316                 /*                               2316                 /*
2317                  * We need to check against @    2317                  * We need to check against @end before FILTER_SNAPSHOTS because
2318                  * if we get to a different i    2318                  * if we get to a different inode that requested we might be
2319                  * seeing keys for a differen    2319                  * seeing keys for a different snapshot tree that will all be
2320                  * filtered out.                 2320                  * filtered out.
2321                  *                               2321                  *
2322                  * But we can't do the full c    2322                  * But we can't do the full check here, because bkey_start_pos()
2323                  * isn't monotonically increa    2323                  * isn't monotonically increasing before FILTER_SNAPSHOTS, and
2324                  * that's what we check again    2324                  * that's what we check against in extents mode:
2325                  */                              2325                  */
2326                 if (unlikely(!(iter->flags &     2326                 if (unlikely(!(iter->flags & BTREE_ITER_is_extents)
2327                              ? bkey_gt(k.k->p    2327                              ? bkey_gt(k.k->p, end)
2328                              : k.k->p.inode >    2328                              : k.k->p.inode > end.inode))
2329                         goto end;                2329                         goto end;
2330                                                  2330 
2331                 if (iter->update_path &&         2331                 if (iter->update_path &&
2332                     !bkey_eq(trans->paths[ite    2332                     !bkey_eq(trans->paths[iter->update_path].pos, k.k->p)) {
2333                         bch2_path_put_nokeep(    2333                         bch2_path_put_nokeep(trans, iter->update_path,
2334                                                  2334                                              iter->flags & BTREE_ITER_intent);
2335                         iter->update_path = 0    2335                         iter->update_path = 0;
2336                 }                                2336                 }
2337                                                  2337 
2338                 if ((iter->flags & BTREE_ITER    2338                 if ((iter->flags & BTREE_ITER_filter_snapshots) &&
2339                     (iter->flags & BTREE_ITER    2339                     (iter->flags & BTREE_ITER_intent) &&
2340                     !(iter->flags & BTREE_ITE    2340                     !(iter->flags & BTREE_ITER_is_extents) &&
2341                     !iter->update_path) {        2341                     !iter->update_path) {
2342                         struct bpos pos = k.k    2342                         struct bpos pos = k.k->p;
2343                                                  2343 
2344                         if (pos.snapshot < it    2344                         if (pos.snapshot < iter->snapshot) {
2345                                 search_key =     2345                                 search_key = bpos_successor(k.k->p);
2346                                 continue;        2346                                 continue;
2347                         }                        2347                         }
2348                                                  2348 
2349                         pos.snapshot = iter->    2349                         pos.snapshot = iter->snapshot;
2350                                                  2350 
2351                         /*                       2351                         /*
2352                          * advance, same as o    2352                          * advance, same as on exit for iter->path, but only up
2353                          * to snapshot           2353                          * to snapshot
2354                          */                      2354                          */
2355                         __btree_path_get(tran    2355                         __btree_path_get(trans, trans->paths + iter->path, iter->flags & BTREE_ITER_intent);
2356                         iter->update_path = i    2356                         iter->update_path = iter->path;
2357                                                  2357 
2358                         iter->update_path = b    2358                         iter->update_path = bch2_btree_path_set_pos(trans,
2359                                                  2359                                                 iter->update_path, pos,
2360                                                  2360                                                 iter->flags & BTREE_ITER_intent,
2361                                                  2361                                                 _THIS_IP_);
2362                         ret = bch2_btree_path    2362                         ret = bch2_btree_path_traverse(trans, iter->update_path, iter->flags);
2363                         if (unlikely(ret)) {     2363                         if (unlikely(ret)) {
2364                                 k = bkey_s_c_    2364                                 k = bkey_s_c_err(ret);
2365                                 goto out_no_l    2365                                 goto out_no_locked;
2366                         }                        2366                         }
2367                 }                                2367                 }
2368                                                  2368 
2369                 /*                               2369                 /*
2370                  * We can never have a key in    2370                  * We can never have a key in a leaf node at POS_MAX, so
2371                  * we don't have to check the    2371                  * we don't have to check these successor() calls:
2372                  */                              2372                  */
2373                 if ((iter->flags & BTREE_ITER    2373                 if ((iter->flags & BTREE_ITER_filter_snapshots) &&
2374                     !bch2_snapshot_is_ancesto    2374                     !bch2_snapshot_is_ancestor(trans->c,
2375                                                  2375                                                iter->snapshot,
2376                                                  2376                                                k.k->p.snapshot)) {
2377                         search_key = bpos_suc    2377                         search_key = bpos_successor(k.k->p);
2378                         continue;                2378                         continue;
2379                 }                                2379                 }
2380                                                  2380 
2381                 if (bkey_whiteout(k.k) &&        2381                 if (bkey_whiteout(k.k) &&
2382                     !(iter->flags & BTREE_ITE    2382                     !(iter->flags & BTREE_ITER_all_snapshots)) {
2383                         search_key = bkey_suc    2383                         search_key = bkey_successor(iter, k.k->p);
2384                         continue;                2384                         continue;
2385                 }                                2385                 }
2386                                                  2386 
2387                 /*                               2387                 /*
2388                  * iter->pos should be monono    2388                  * iter->pos should be mononotically increasing, and always be
2389                  * equal to the key we just r    2389                  * equal to the key we just returned - except extents can
2390                  * straddle iter->pos:           2390                  * straddle iter->pos:
2391                  */                              2391                  */
2392                 if (!(iter->flags & BTREE_ITE    2392                 if (!(iter->flags & BTREE_ITER_is_extents))
2393                         iter_pos = k.k->p;       2393                         iter_pos = k.k->p;
2394                 else                             2394                 else
2395                         iter_pos = bkey_max(i    2395                         iter_pos = bkey_max(iter->pos, bkey_start_pos(k.k));
2396                                                  2396 
2397                 if (unlikely(iter->flags & BT    2397                 if (unlikely(iter->flags & BTREE_ITER_all_snapshots     ? bpos_gt(iter_pos, end) :
2398                              iter->flags & BT    2398                              iter->flags & BTREE_ITER_is_extents        ? bkey_ge(iter_pos, end) :
2399                                                  2399                                                                           bkey_gt(iter_pos, end)))
2400                         goto end;                2400                         goto end;
2401                                                  2401 
2402                 break;                           2402                 break;
2403         }                                        2403         }
2404                                                  2404 
2405         iter->pos = iter_pos;                    2405         iter->pos = iter_pos;
2406                                                  2406 
2407         iter->path = bch2_btree_path_set_pos(    2407         iter->path = bch2_btree_path_set_pos(trans, iter->path, k.k->p,
2408                                 iter->flags &    2408                                 iter->flags & BTREE_ITER_intent,
2409                                 btree_iter_ip    2409                                 btree_iter_ip_allocated(iter));
2410                                                  2410 
2411         btree_path_set_should_be_locked(trans    2411         btree_path_set_should_be_locked(trans, btree_iter_path(trans, iter));
2412 out_no_locked:                                   2412 out_no_locked:
2413         if (iter->update_path) {                 2413         if (iter->update_path) {
2414                 ret = bch2_btree_path_relock(    2414                 ret = bch2_btree_path_relock(trans, trans->paths + iter->update_path, _THIS_IP_);
2415                 if (unlikely(ret))               2415                 if (unlikely(ret))
2416                         k = bkey_s_c_err(ret)    2416                         k = bkey_s_c_err(ret);
2417                 else                             2417                 else
2418                         btree_path_set_should    2418                         btree_path_set_should_be_locked(trans, trans->paths + iter->update_path);
2419         }                                        2419         }
2420                                                  2420 
2421         if (!(iter->flags & BTREE_ITER_all_sn    2421         if (!(iter->flags & BTREE_ITER_all_snapshots))
2422                 iter->pos.snapshot = iter->sn    2422                 iter->pos.snapshot = iter->snapshot;
2423                                                  2423 
2424         ret = bch2_btree_iter_verify_ret(iter    2424         ret = bch2_btree_iter_verify_ret(iter, k);
2425         if (unlikely(ret)) {                     2425         if (unlikely(ret)) {
2426                 bch2_btree_iter_set_pos(iter,    2426                 bch2_btree_iter_set_pos(iter, iter->pos);
2427                 k = bkey_s_c_err(ret);           2427                 k = bkey_s_c_err(ret);
2428         }                                        2428         }
2429                                                  2429 
2430         bch2_btree_iter_verify_entry_exit(ite    2430         bch2_btree_iter_verify_entry_exit(iter);
2431                                                  2431 
2432         return k;                                2432         return k;
2433 end:                                             2433 end:
2434         bch2_btree_iter_set_pos(iter, end);      2434         bch2_btree_iter_set_pos(iter, end);
2435         k = bkey_s_c_null;                       2435         k = bkey_s_c_null;
2436         goto out_no_locked;                      2436         goto out_no_locked;
2437 }                                                2437 }
2438                                                  2438 
2439 /**                                              2439 /**
2440  * bch2_btree_iter_next() - returns first key    2440  * bch2_btree_iter_next() - returns first key greater than iterator's current
2441  * position                                      2441  * position
2442  * @iter:       iterator to peek from            2442  * @iter:       iterator to peek from
2443  *                                               2443  *
2444  * Returns:     key if found, or an error ext    2444  * Returns:     key if found, or an error extractable with bkey_err().
2445  */                                              2445  */
2446 struct bkey_s_c bch2_btree_iter_next(struct b    2446 struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter)
2447 {                                                2447 {
2448         if (!bch2_btree_iter_advance(iter))      2448         if (!bch2_btree_iter_advance(iter))
2449                 return bkey_s_c_null;            2449                 return bkey_s_c_null;
2450                                                  2450 
2451         return bch2_btree_iter_peek(iter);       2451         return bch2_btree_iter_peek(iter);
2452 }                                                2452 }
2453                                                  2453 
2454 /**                                              2454 /**
2455  * bch2_btree_iter_peek_prev() - returns firs    2455  * bch2_btree_iter_peek_prev() - returns first key less than or equal to
2456  * iterator's current position                   2456  * iterator's current position
2457  * @iter:       iterator to peek from            2457  * @iter:       iterator to peek from
2458  *                                               2458  *
2459  * Returns:     key if found, or an error ext    2459  * Returns:     key if found, or an error extractable with bkey_err().
2460  */                                              2460  */
2461 struct bkey_s_c bch2_btree_iter_peek_prev(str    2461 struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
2462 {                                                2462 {
2463         struct btree_trans *trans = iter->tra    2463         struct btree_trans *trans = iter->trans;
2464         struct bpos search_key = iter->pos;      2464         struct bpos search_key = iter->pos;
2465         struct bkey_s_c k;                       2465         struct bkey_s_c k;
2466         struct bkey saved_k;                     2466         struct bkey saved_k;
2467         const struct bch_val *saved_v;           2467         const struct bch_val *saved_v;
2468         btree_path_idx_t saved_path = 0;         2468         btree_path_idx_t saved_path = 0;
2469         int ret;                                 2469         int ret;
2470                                                  2470 
2471         bch2_trans_verify_not_unlocked(trans)    2471         bch2_trans_verify_not_unlocked(trans);
2472         EBUG_ON(btree_iter_path(trans, iter)-    2472         EBUG_ON(btree_iter_path(trans, iter)->cached ||
2473                 btree_iter_path(trans, iter)-    2473                 btree_iter_path(trans, iter)->level);
2474                                                  2474 
2475         if (iter->flags & BTREE_ITER_with_jou    2475         if (iter->flags & BTREE_ITER_with_journal)
2476                 return bkey_s_c_err(-BCH_ERR_    2476                 return bkey_s_c_err(-BCH_ERR_btree_iter_with_journal_not_supported);
2477                                                  2477 
2478         bch2_btree_iter_verify(iter);            2478         bch2_btree_iter_verify(iter);
2479         bch2_btree_iter_verify_entry_exit(ite    2479         bch2_btree_iter_verify_entry_exit(iter);
2480                                                  2480 
2481         if (iter->flags & BTREE_ITER_filter_s    2481         if (iter->flags & BTREE_ITER_filter_snapshots)
2482                 search_key.snapshot = U32_MAX    2482                 search_key.snapshot = U32_MAX;
2483                                                  2483 
2484         while (1) {                              2484         while (1) {
2485                 iter->path = bch2_btree_path_    2485                 iter->path = bch2_btree_path_set_pos(trans, iter->path, search_key,
2486                                                  2486                                                 iter->flags & BTREE_ITER_intent,
2487                                                  2487                                                 btree_iter_ip_allocated(iter));
2488                                                  2488 
2489                 ret = bch2_btree_path_travers    2489                 ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
2490                 if (unlikely(ret)) {             2490                 if (unlikely(ret)) {
2491                         /* ensure that iter->    2491                         /* ensure that iter->k is consistent with iter->pos: */
2492                         bch2_btree_iter_set_p    2492                         bch2_btree_iter_set_pos(iter, iter->pos);
2493                         k = bkey_s_c_err(ret)    2493                         k = bkey_s_c_err(ret);
2494                         goto out_no_locked;      2494                         goto out_no_locked;
2495                 }                                2495                 }
2496                                                  2496 
2497                 struct btree_path *path = btr    2497                 struct btree_path *path = btree_iter_path(trans, iter);
2498                                                  2498 
2499                 k = btree_path_level_peek(tra    2499                 k = btree_path_level_peek(trans, path, &path->l[0], &iter->k);
2500                 if (!k.k ||                      2500                 if (!k.k ||
2501                     ((iter->flags & BTREE_ITE    2501                     ((iter->flags & BTREE_ITER_is_extents)
2502                      ? bpos_ge(bkey_start_pos    2502                      ? bpos_ge(bkey_start_pos(k.k), search_key)
2503                      : bpos_gt(k.k->p, search    2503                      : bpos_gt(k.k->p, search_key)))
2504                         k = btree_path_level_    2504                         k = btree_path_level_prev(trans, path, &path->l[0], &iter->k);
2505                                                  2505 
2506                 if (unlikely((iter->flags & B    2506                 if (unlikely((iter->flags & BTREE_ITER_with_updates) &&
2507                              trans->nr_update    2507                              trans->nr_updates))
2508                         bch2_btree_trans_peek    2508                         bch2_btree_trans_peek_prev_updates(trans, iter, &k);
2509                                                  2509 
2510                 if (likely(k.k)) {               2510                 if (likely(k.k)) {
2511                         if (iter->flags & BTR    2511                         if (iter->flags & BTREE_ITER_filter_snapshots) {
2512                                 if (k.k->p.sn    2512                                 if (k.k->p.snapshot == iter->snapshot)
2513                                         goto     2513                                         goto got_key;
2514                                                  2514 
2515                                 /*               2515                                 /*
2516                                  * If we have    2516                                  * If we have a saved candidate, and we're no
2517                                  * longer at     2517                                  * longer at the same _key_ (not pos), return
2518                                  * that candi    2518                                  * that candidate
2519                                  */              2519                                  */
2520                                 if (saved_pat    2520                                 if (saved_path && !bkey_eq(k.k->p, saved_k.p)) {
2521                                         bch2_    2521                                         bch2_path_put_nokeep(trans, iter->path,
2522                                                  2522                                                       iter->flags & BTREE_ITER_intent);
2523                                         iter-    2523                                         iter->path = saved_path;
2524                                         saved    2524                                         saved_path = 0;
2525                                         iter-    2525                                         iter->k = saved_k;
2526                                         k.v      2526                                         k.v     = saved_v;
2527                                         goto     2527                                         goto got_key;
2528                                 }                2528                                 }
2529                                                  2529 
2530                                 if (bch2_snap    2530                                 if (bch2_snapshot_is_ancestor(trans->c,
2531                                                  2531                                                               iter->snapshot,
2532                                                  2532                                                               k.k->p.snapshot)) {
2533                                         if (s    2533                                         if (saved_path)
2534                                                  2534                                                 bch2_path_put_nokeep(trans, saved_path,
2535                                                  2535                                                       iter->flags & BTREE_ITER_intent);
2536                                         saved    2536                                         saved_path = btree_path_clone(trans, iter->path,
2537                                                  2537                                                                 iter->flags & BTREE_ITER_intent,
2538                                                  2538                                                                 _THIS_IP_);
2539                                         path     2539                                         path = btree_iter_path(trans, iter);
2540                                         trace    2540                                         trace_btree_path_save_pos(trans, path, trans->paths + saved_path);
2541                                         saved    2541                                         saved_k = *k.k;
2542                                         saved    2542                                         saved_v = k.v;
2543                                 }                2543                                 }
2544                                                  2544 
2545                                 search_key =     2545                                 search_key = bpos_predecessor(k.k->p);
2546                                 continue;        2546                                 continue;
2547                         }                        2547                         }
2548 got_key:                                         2548 got_key:
2549                         if (bkey_whiteout(k.k    2549                         if (bkey_whiteout(k.k) &&
2550                             !(iter->flags & B    2550                             !(iter->flags & BTREE_ITER_all_snapshots)) {
2551                                 search_key =     2551                                 search_key = bkey_predecessor(iter, k.k->p);
2552                                 if (iter->fla    2552                                 if (iter->flags & BTREE_ITER_filter_snapshots)
2553                                         searc    2553                                         search_key.snapshot = U32_MAX;
2554                                 continue;        2554                                 continue;
2555                         }                        2555                         }
2556                                                  2556 
2557                         btree_path_set_should    2557                         btree_path_set_should_be_locked(trans, path);
2558                         break;                   2558                         break;
2559                 } else if (likely(!bpos_eq(pa    2559                 } else if (likely(!bpos_eq(path->l[0].b->data->min_key, POS_MIN))) {
2560                         /* Advance to previou    2560                         /* Advance to previous leaf node: */
2561                         search_key = bpos_pre    2561                         search_key = bpos_predecessor(path->l[0].b->data->min_key);
2562                 } else {                         2562                 } else {
2563                         /* Start of btree: */    2563                         /* Start of btree: */
2564                         bch2_btree_iter_set_p    2564                         bch2_btree_iter_set_pos(iter, POS_MIN);
2565                         k = bkey_s_c_null;       2565                         k = bkey_s_c_null;
2566                         goto out_no_locked;      2566                         goto out_no_locked;
2567                 }                                2567                 }
2568         }                                        2568         }
2569                                                  2569 
2570         EBUG_ON(bkey_gt(bkey_start_pos(k.k),     2570         EBUG_ON(bkey_gt(bkey_start_pos(k.k), iter->pos));
2571                                                  2571 
2572         /* Extents can straddle iter->pos: */    2572         /* Extents can straddle iter->pos: */
2573         if (bkey_lt(k.k->p, iter->pos))          2573         if (bkey_lt(k.k->p, iter->pos))
2574                 iter->pos = k.k->p;              2574                 iter->pos = k.k->p;
2575                                                  2575 
2576         if (iter->flags & BTREE_ITER_filter_s    2576         if (iter->flags & BTREE_ITER_filter_snapshots)
2577                 iter->pos.snapshot = iter->sn    2577                 iter->pos.snapshot = iter->snapshot;
2578 out_no_locked:                                   2578 out_no_locked:
2579         if (saved_path)                          2579         if (saved_path)
2580                 bch2_path_put_nokeep(trans, s    2580                 bch2_path_put_nokeep(trans, saved_path, iter->flags & BTREE_ITER_intent);
2581                                                  2581 
2582         bch2_btree_iter_verify_entry_exit(ite    2582         bch2_btree_iter_verify_entry_exit(iter);
2583         bch2_btree_iter_verify(iter);            2583         bch2_btree_iter_verify(iter);
2584                                                  2584 
2585         return k;                                2585         return k;
2586 }                                                2586 }
2587                                                  2587 
2588 /**                                              2588 /**
2589  * bch2_btree_iter_prev() - returns first key    2589  * bch2_btree_iter_prev() - returns first key less than iterator's current
2590  * position                                      2590  * position
2591  * @iter:       iterator to peek from            2591  * @iter:       iterator to peek from
2592  *                                               2592  *
2593  * Returns:     key if found, or an error ext    2593  * Returns:     key if found, or an error extractable with bkey_err().
2594  */                                              2594  */
2595 struct bkey_s_c bch2_btree_iter_prev(struct b    2595 struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *iter)
2596 {                                                2596 {
2597         if (!bch2_btree_iter_rewind(iter))       2597         if (!bch2_btree_iter_rewind(iter))
2598                 return bkey_s_c_null;            2598                 return bkey_s_c_null;
2599                                                  2599 
2600         return bch2_btree_iter_peek_prev(iter    2600         return bch2_btree_iter_peek_prev(iter);
2601 }                                                2601 }
2602                                                  2602 
2603 struct bkey_s_c bch2_btree_iter_peek_slot(str    2603 struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
2604 {                                                2604 {
2605         struct btree_trans *trans = iter->tra    2605         struct btree_trans *trans = iter->trans;
2606         struct bpos search_key;                  2606         struct bpos search_key;
2607         struct bkey_s_c k;                       2607         struct bkey_s_c k;
2608         int ret;                                 2608         int ret;
2609                                                  2609 
2610         bch2_trans_verify_not_unlocked(trans)    2610         bch2_trans_verify_not_unlocked(trans);
2611         bch2_btree_iter_verify(iter);            2611         bch2_btree_iter_verify(iter);
2612         bch2_btree_iter_verify_entry_exit(ite    2612         bch2_btree_iter_verify_entry_exit(iter);
2613         EBUG_ON(btree_iter_path(trans, iter)-    2613         EBUG_ON(btree_iter_path(trans, iter)->level && (iter->flags & BTREE_ITER_with_key_cache));
2614                                                  2614 
2615         /* extents can't span inode numbers:     2615         /* extents can't span inode numbers: */
2616         if ((iter->flags & BTREE_ITER_is_exte    2616         if ((iter->flags & BTREE_ITER_is_extents) &&
2617             unlikely(iter->pos.offset == KEY_    2617             unlikely(iter->pos.offset == KEY_OFFSET_MAX)) {
2618                 if (iter->pos.inode == KEY_IN    2618                 if (iter->pos.inode == KEY_INODE_MAX)
2619                         return bkey_s_c_null;    2619                         return bkey_s_c_null;
2620                                                  2620 
2621                 bch2_btree_iter_set_pos(iter,    2621                 bch2_btree_iter_set_pos(iter, bpos_nosnap_successor(iter->pos));
2622         }                                        2622         }
2623                                                  2623 
2624         search_key = btree_iter_search_key(it    2624         search_key = btree_iter_search_key(iter);
2625         iter->path = bch2_btree_path_set_pos(    2625         iter->path = bch2_btree_path_set_pos(trans, iter->path, search_key,
2626                                         iter-    2626                                         iter->flags & BTREE_ITER_intent,
2627                                         btree    2627                                         btree_iter_ip_allocated(iter));
2628                                                  2628 
2629         ret = bch2_btree_path_traverse(trans,    2629         ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
2630         if (unlikely(ret)) {                     2630         if (unlikely(ret)) {
2631                 k = bkey_s_c_err(ret);           2631                 k = bkey_s_c_err(ret);
2632                 goto out_no_locked;              2632                 goto out_no_locked;
2633         }                                        2633         }
2634                                                  2634 
2635         if ((iter->flags & BTREE_ITER_cached)    2635         if ((iter->flags & BTREE_ITER_cached) ||
2636             !(iter->flags & (BTREE_ITER_is_ex    2636             !(iter->flags & (BTREE_ITER_is_extents|BTREE_ITER_filter_snapshots))) {
2637                 k = bkey_s_c_null;               2637                 k = bkey_s_c_null;
2638                                                  2638 
2639                 if (unlikely((iter->flags & B    2639                 if (unlikely((iter->flags & BTREE_ITER_with_updates) &&
2640                              trans->nr_update    2640                              trans->nr_updates)) {
2641                         bch2_btree_trans_peek    2641                         bch2_btree_trans_peek_slot_updates(trans, iter, &k);
2642                         if (k.k)                 2642                         if (k.k)
2643                                 goto out;        2643                                 goto out;
2644                 }                                2644                 }
2645                                                  2645 
2646                 if (unlikely(iter->flags & BT    2646                 if (unlikely(iter->flags & BTREE_ITER_with_journal) &&
2647                     (k = btree_trans_peek_slo    2647                     (k = btree_trans_peek_slot_journal(trans, iter)).k)
2648                         goto out;                2648                         goto out;
2649                                                  2649 
2650                 if (unlikely(iter->flags & BT    2650                 if (unlikely(iter->flags & BTREE_ITER_with_key_cache) &&
2651                     (k = btree_trans_peek_key    2651                     (k = btree_trans_peek_key_cache(iter, iter->pos)).k) {
2652                         if (!bkey_err(k))        2652                         if (!bkey_err(k))
2653                                 iter->k = *k.    2653                                 iter->k = *k.k;
2654                         /* We're not returnin    2654                         /* We're not returning a key from iter->path: */
2655                         goto out_no_locked;      2655                         goto out_no_locked;
2656                 }                                2656                 }
2657                                                  2657 
2658                 k = bch2_btree_path_peek_slot    2658                 k = bch2_btree_path_peek_slot(trans->paths + iter->path, &iter->k);
2659                 if (unlikely(!k.k))              2659                 if (unlikely(!k.k))
2660                         goto out_no_locked;      2660                         goto out_no_locked;
2661         } else {                                 2661         } else {
2662                 struct bpos next;                2662                 struct bpos next;
2663                 struct bpos end = iter->pos;     2663                 struct bpos end = iter->pos;
2664                                                  2664 
2665                 if (iter->flags & BTREE_ITER_    2665                 if (iter->flags & BTREE_ITER_is_extents)
2666                         end.offset = U64_MAX;    2666                         end.offset = U64_MAX;
2667                                                  2667 
2668                 EBUG_ON(btree_iter_path(trans    2668                 EBUG_ON(btree_iter_path(trans, iter)->level);
2669                                                  2669 
2670                 if (iter->flags & BTREE_ITER_    2670                 if (iter->flags & BTREE_ITER_intent) {
2671                         struct btree_iter ite    2671                         struct btree_iter iter2;
2672                                                  2672 
2673                         bch2_trans_copy_iter(    2673                         bch2_trans_copy_iter(&iter2, iter);
2674                         k = bch2_btree_iter_p    2674                         k = bch2_btree_iter_peek_upto(&iter2, end);
2675                                                  2675 
2676                         if (k.k && !bkey_err(    2676                         if (k.k && !bkey_err(k)) {
2677                                 swap(iter->ke    2677                                 swap(iter->key_cache_path, iter2.key_cache_path);
2678                                 iter->k = ite    2678                                 iter->k = iter2.k;
2679                                 k.k = &iter->    2679                                 k.k = &iter->k;
2680                         }                        2680                         }
2681                         bch2_trans_iter_exit(    2681                         bch2_trans_iter_exit(trans, &iter2);
2682                 } else {                         2682                 } else {
2683                         struct bpos pos = ite    2683                         struct bpos pos = iter->pos;
2684                                                  2684 
2685                         k = bch2_btree_iter_p    2685                         k = bch2_btree_iter_peek_upto(iter, end);
2686                         if (unlikely(bkey_err    2686                         if (unlikely(bkey_err(k)))
2687                                 bch2_btree_it    2687                                 bch2_btree_iter_set_pos(iter, pos);
2688                         else                     2688                         else
2689                                 iter->pos = p    2689                                 iter->pos = pos;
2690                 }                                2690                 }
2691                                                  2691 
2692                 if (unlikely(bkey_err(k)))       2692                 if (unlikely(bkey_err(k)))
2693                         goto out_no_locked;      2693                         goto out_no_locked;
2694                                                  2694 
2695                 next = k.k ? bkey_start_pos(k    2695                 next = k.k ? bkey_start_pos(k.k) : POS_MAX;
2696                                                  2696 
2697                 if (bkey_lt(iter->pos, next))    2697                 if (bkey_lt(iter->pos, next)) {
2698                         bkey_init(&iter->k);     2698                         bkey_init(&iter->k);
2699                         iter->k.p = iter->pos    2699                         iter->k.p = iter->pos;
2700                                                  2700 
2701                         if (iter->flags & BTR    2701                         if (iter->flags & BTREE_ITER_is_extents) {
2702                                 bch2_key_resi    2702                                 bch2_key_resize(&iter->k,
2703                                                  2703                                                 min_t(u64, KEY_SIZE_MAX,
2704                                                  2704                                                       (next.inode == iter->pos.inode
2705                                                  2705                                                        ? next.offset
2706                                                  2706                                                        : KEY_OFFSET_MAX) -
2707                                                  2707                                                       iter->pos.offset));
2708                                 EBUG_ON(!iter    2708                                 EBUG_ON(!iter->k.size);
2709                         }                        2709                         }
2710                                                  2710 
2711                         k = (struct bkey_s_c)    2711                         k = (struct bkey_s_c) { &iter->k, NULL };
2712                 }                                2712                 }
2713         }                                        2713         }
2714 out:                                             2714 out:
2715         btree_path_set_should_be_locked(trans    2715         btree_path_set_should_be_locked(trans, btree_iter_path(trans, iter));
2716 out_no_locked:                                   2716 out_no_locked:
2717         bch2_btree_iter_verify_entry_exit(ite    2717         bch2_btree_iter_verify_entry_exit(iter);
2718         bch2_btree_iter_verify(iter);            2718         bch2_btree_iter_verify(iter);
2719         ret = bch2_btree_iter_verify_ret(iter    2719         ret = bch2_btree_iter_verify_ret(iter, k);
2720         if (unlikely(ret))                       2720         if (unlikely(ret))
2721                 return bkey_s_c_err(ret);        2721                 return bkey_s_c_err(ret);
2722                                                  2722 
2723         return k;                                2723         return k;
2724 }                                                2724 }
2725                                                  2725 
2726 struct bkey_s_c bch2_btree_iter_next_slot(str    2726 struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *iter)
2727 {                                                2727 {
2728         if (!bch2_btree_iter_advance(iter))      2728         if (!bch2_btree_iter_advance(iter))
2729                 return bkey_s_c_null;            2729                 return bkey_s_c_null;
2730                                                  2730 
2731         return bch2_btree_iter_peek_slot(iter    2731         return bch2_btree_iter_peek_slot(iter);
2732 }                                                2732 }
2733                                                  2733 
2734 struct bkey_s_c bch2_btree_iter_prev_slot(str    2734 struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_iter *iter)
2735 {                                                2735 {
2736         if (!bch2_btree_iter_rewind(iter))       2736         if (!bch2_btree_iter_rewind(iter))
2737                 return bkey_s_c_null;            2737                 return bkey_s_c_null;
2738                                                  2738 
2739         return bch2_btree_iter_peek_slot(iter    2739         return bch2_btree_iter_peek_slot(iter);
2740 }                                                2740 }
2741                                                  2741 
2742 /* Obsolete, but still used by rust wrapper i    2742 /* Obsolete, but still used by rust wrapper in -tools */
2743 struct bkey_s_c bch2_btree_iter_peek_and_rest    2743 struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *iter)
2744 {                                                2744 {
2745         struct bkey_s_c k;                       2745         struct bkey_s_c k;
2746                                                  2746 
2747         while (btree_trans_too_many_iters(ite    2747         while (btree_trans_too_many_iters(iter->trans) ||
2748                (k = bch2_btree_iter_peek_type    2748                (k = bch2_btree_iter_peek_type(iter, iter->flags),
2749                 bch2_err_matches(bkey_err(k),    2749                 bch2_err_matches(bkey_err(k), BCH_ERR_transaction_restart)))
2750                 bch2_trans_begin(iter->trans)    2750                 bch2_trans_begin(iter->trans);
2751                                                  2751 
2752         return k;                                2752         return k;
2753 }                                                2753 }
2754                                                  2754 
2755 /* new transactional stuff: */                   2755 /* new transactional stuff: */
2756                                                  2756 
2757 #ifdef CONFIG_BCACHEFS_DEBUG                     2757 #ifdef CONFIG_BCACHEFS_DEBUG
2758 static void btree_trans_verify_sorted_refs(st    2758 static void btree_trans_verify_sorted_refs(struct btree_trans *trans)
2759 {                                                2759 {
2760         struct btree_path *path;                 2760         struct btree_path *path;
2761         unsigned i;                              2761         unsigned i;
2762                                                  2762 
2763         BUG_ON(trans->nr_sorted != bitmap_wei    2763         BUG_ON(trans->nr_sorted != bitmap_weight(trans->paths_allocated, trans->nr_paths) - 1);
2764                                                  2764 
2765         trans_for_each_path(trans, path, i) {    2765         trans_for_each_path(trans, path, i) {
2766                 BUG_ON(path->sorted_idx >= tr    2766                 BUG_ON(path->sorted_idx >= trans->nr_sorted);
2767                 BUG_ON(trans->sorted[path->so    2767                 BUG_ON(trans->sorted[path->sorted_idx] != i);
2768         }                                        2768         }
2769                                                  2769 
2770         for (i = 0; i < trans->nr_sorted; i++    2770         for (i = 0; i < trans->nr_sorted; i++) {
2771                 unsigned idx = trans->sorted[    2771                 unsigned idx = trans->sorted[i];
2772                                                  2772 
2773                 BUG_ON(!test_bit(idx, trans->    2773                 BUG_ON(!test_bit(idx, trans->paths_allocated));
2774                 BUG_ON(trans->paths[idx].sort    2774                 BUG_ON(trans->paths[idx].sorted_idx != i);
2775         }                                        2775         }
2776 }                                                2776 }
2777                                                  2777 
2778 static void btree_trans_verify_sorted(struct     2778 static void btree_trans_verify_sorted(struct btree_trans *trans)
2779 {                                                2779 {
2780         struct btree_path *path, *prev = NULL    2780         struct btree_path *path, *prev = NULL;
2781         struct trans_for_each_path_inorder_it    2781         struct trans_for_each_path_inorder_iter iter;
2782                                                  2782 
2783         if (!bch2_debug_check_iterators)         2783         if (!bch2_debug_check_iterators)
2784                 return;                          2784                 return;
2785                                                  2785 
2786         trans_for_each_path_inorder(trans, pa    2786         trans_for_each_path_inorder(trans, path, iter) {
2787                 if (prev && btree_path_cmp(pr    2787                 if (prev && btree_path_cmp(prev, path) > 0) {
2788                         __bch2_dump_trans_pat    2788                         __bch2_dump_trans_paths_updates(trans, true);
2789                         panic("trans paths ou    2789                         panic("trans paths out of order!\n");
2790                 }                                2790                 }
2791                 prev = path;                     2791                 prev = path;
2792         }                                        2792         }
2793 }                                                2793 }
2794 #else                                            2794 #else
2795 static inline void btree_trans_verify_sorted_    2795 static inline void btree_trans_verify_sorted_refs(struct btree_trans *trans) {}
2796 static inline void btree_trans_verify_sorted(    2796 static inline void btree_trans_verify_sorted(struct btree_trans *trans) {}
2797 #endif                                           2797 #endif
2798                                                  2798 
2799 void __bch2_btree_trans_sort_paths(struct btr    2799 void __bch2_btree_trans_sort_paths(struct btree_trans *trans)
2800 {                                                2800 {
2801         int i, l = 0, r = trans->nr_sorted, i    2801         int i, l = 0, r = trans->nr_sorted, inc = 1;
2802         bool swapped;                            2802         bool swapped;
2803                                                  2803 
2804         btree_trans_verify_sorted_refs(trans)    2804         btree_trans_verify_sorted_refs(trans);
2805                                                  2805 
2806         if (trans->paths_sorted)                 2806         if (trans->paths_sorted)
2807                 goto out;                        2807                 goto out;
2808                                                  2808 
2809         /*                                       2809         /*
2810          * Cocktail shaker sort: this is effi    2810          * Cocktail shaker sort: this is efficient because iterators will be
2811          * mostly sorted.                        2811          * mostly sorted.
2812          */                                      2812          */
2813         do {                                     2813         do {
2814                 swapped = false;                 2814                 swapped = false;
2815                                                  2815 
2816                 for (i = inc > 0 ? l : r - 2;    2816                 for (i = inc > 0 ? l : r - 2;
2817                      i + 1 < r && i >= l;        2817                      i + 1 < r && i >= l;
2818                      i += inc) {                 2818                      i += inc) {
2819                         if (btree_path_cmp(tr    2819                         if (btree_path_cmp(trans->paths + trans->sorted[i],
2820                                            tr    2820                                            trans->paths + trans->sorted[i + 1]) > 0) {
2821                                 swap(trans->s    2821                                 swap(trans->sorted[i], trans->sorted[i + 1]);
2822                                 trans->paths[    2822                                 trans->paths[trans->sorted[i]].sorted_idx = i;
2823                                 trans->paths[    2823                                 trans->paths[trans->sorted[i + 1]].sorted_idx = i + 1;
2824                                 swapped = tru    2824                                 swapped = true;
2825                         }                        2825                         }
2826                 }                                2826                 }
2827                                                  2827 
2828                 if (inc > 0)                     2828                 if (inc > 0)
2829                         --r;                     2829                         --r;
2830                 else                             2830                 else
2831                         l++;                     2831                         l++;
2832                 inc = -inc;                      2832                 inc = -inc;
2833         } while (swapped);                       2833         } while (swapped);
2834                                                  2834 
2835         trans->paths_sorted = true;              2835         trans->paths_sorted = true;
2836 out:                                             2836 out:
2837         btree_trans_verify_sorted(trans);        2837         btree_trans_verify_sorted(trans);
2838 }                                                2838 }
2839                                                  2839 
2840 static inline void btree_path_list_remove(str    2840 static inline void btree_path_list_remove(struct btree_trans *trans,
2841                                           str    2841                                           struct btree_path *path)
2842 {                                                2842 {
2843         EBUG_ON(path->sorted_idx >= trans->nr    2843         EBUG_ON(path->sorted_idx >= trans->nr_sorted);
2844 #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS    2844 #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
2845         trans->nr_sorted--;                      2845         trans->nr_sorted--;
2846         memmove_u64s_down_small(trans->sorted    2846         memmove_u64s_down_small(trans->sorted + path->sorted_idx,
2847                                 trans->sorted    2847                                 trans->sorted + path->sorted_idx + 1,
2848                                 DIV_ROUND_UP(    2848                                 DIV_ROUND_UP(trans->nr_sorted - path->sorted_idx,
2849                                                  2849                                              sizeof(u64) / sizeof(btree_path_idx_t)));
2850 #else                                            2850 #else
2851         array_remove_item(trans->sorted, tran    2851         array_remove_item(trans->sorted, trans->nr_sorted, path->sorted_idx);
2852 #endif                                           2852 #endif
2853         for (unsigned i = path->sorted_idx; i    2853         for (unsigned i = path->sorted_idx; i < trans->nr_sorted; i++)
2854                 trans->paths[trans->sorted[i]    2854                 trans->paths[trans->sorted[i]].sorted_idx = i;
2855 }                                                2855 }
2856                                                  2856 
2857 static inline void btree_path_list_add(struct    2857 static inline void btree_path_list_add(struct btree_trans *trans,
2858                                        btree_    2858                                        btree_path_idx_t pos,
2859                                        btree_    2859                                        btree_path_idx_t path_idx)
2860 {                                                2860 {
2861         struct btree_path *path = trans->path    2861         struct btree_path *path = trans->paths + path_idx;
2862                                                  2862 
2863         path->sorted_idx = pos ? trans->paths    2863         path->sorted_idx = pos ? trans->paths[pos].sorted_idx + 1 : trans->nr_sorted;
2864                                                  2864 
2865 #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS    2865 #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
2866         memmove_u64s_up_small(trans->sorted +    2866         memmove_u64s_up_small(trans->sorted + path->sorted_idx + 1,
2867                               trans->sorted +    2867                               trans->sorted + path->sorted_idx,
2868                               DIV_ROUND_UP(tr    2868                               DIV_ROUND_UP(trans->nr_sorted - path->sorted_idx,
2869                                            si    2869                                            sizeof(u64) / sizeof(btree_path_idx_t)));
2870         trans->nr_sorted++;                      2870         trans->nr_sorted++;
2871         trans->sorted[path->sorted_idx] = pat    2871         trans->sorted[path->sorted_idx] = path_idx;
2872 #else                                            2872 #else
2873         array_insert_item(trans->sorted, tran    2873         array_insert_item(trans->sorted, trans->nr_sorted, path->sorted_idx, path_idx);
2874 #endif                                           2874 #endif
2875                                                  2875 
2876         for (unsigned i = path->sorted_idx; i    2876         for (unsigned i = path->sorted_idx; i < trans->nr_sorted; i++)
2877                 trans->paths[trans->sorted[i]    2877                 trans->paths[trans->sorted[i]].sorted_idx = i;
2878                                                  2878 
2879         btree_trans_verify_sorted_refs(trans)    2879         btree_trans_verify_sorted_refs(trans);
2880 }                                                2880 }
2881                                                  2881 
2882 void bch2_trans_iter_exit(struct btree_trans     2882 void bch2_trans_iter_exit(struct btree_trans *trans, struct btree_iter *iter)
2883 {                                                2883 {
2884         if (iter->update_path)                   2884         if (iter->update_path)
2885                 bch2_path_put_nokeep(trans, i    2885                 bch2_path_put_nokeep(trans, iter->update_path,
2886                               iter->flags & B    2886                               iter->flags & BTREE_ITER_intent);
2887         if (iter->path)                          2887         if (iter->path)
2888                 bch2_path_put(trans, iter->pa    2888                 bch2_path_put(trans, iter->path,
2889                               iter->flags & B    2889                               iter->flags & BTREE_ITER_intent);
2890         if (iter->key_cache_path)                2890         if (iter->key_cache_path)
2891                 bch2_path_put(trans, iter->ke    2891                 bch2_path_put(trans, iter->key_cache_path,
2892                               iter->flags & B    2892                               iter->flags & BTREE_ITER_intent);
2893         iter->path              = 0;             2893         iter->path              = 0;
2894         iter->update_path       = 0;             2894         iter->update_path       = 0;
2895         iter->key_cache_path    = 0;             2895         iter->key_cache_path    = 0;
2896         iter->trans             = NULL;          2896         iter->trans             = NULL;
2897 }                                                2897 }
2898                                                  2898 
2899 void bch2_trans_iter_init_outlined(struct btr    2899 void bch2_trans_iter_init_outlined(struct btree_trans *trans,
2900                           struct btree_iter *    2900                           struct btree_iter *iter,
2901                           enum btree_id btree    2901                           enum btree_id btree_id, struct bpos pos,
2902                           unsigned flags)        2902                           unsigned flags)
2903 {                                                2903 {
2904         bch2_trans_iter_init_common(trans, it    2904         bch2_trans_iter_init_common(trans, iter, btree_id, pos, 0, 0,
2905                                bch2_btree_ite    2905                                bch2_btree_iter_flags(trans, btree_id, flags),
2906                                _RET_IP_);        2906                                _RET_IP_);
2907 }                                                2907 }
2908                                                  2908 
2909 void bch2_trans_node_iter_init(struct btree_t    2909 void bch2_trans_node_iter_init(struct btree_trans *trans,
2910                                struct btree_i    2910                                struct btree_iter *iter,
2911                                enum btree_id     2911                                enum btree_id btree_id,
2912                                struct bpos po    2912                                struct bpos pos,
2913                                unsigned locks    2913                                unsigned locks_want,
2914                                unsigned depth    2914                                unsigned depth,
2915                                unsigned flags    2915                                unsigned flags)
2916 {                                                2916 {
2917         flags |= BTREE_ITER_not_extents;         2917         flags |= BTREE_ITER_not_extents;
2918         flags |= BTREE_ITER_snapshot_field;      2918         flags |= BTREE_ITER_snapshot_field;
2919         flags |= BTREE_ITER_all_snapshots;       2919         flags |= BTREE_ITER_all_snapshots;
2920                                                  2920 
2921         bch2_trans_iter_init_common(trans, it    2921         bch2_trans_iter_init_common(trans, iter, btree_id, pos, locks_want, depth,
2922                                __bch2_btree_i    2922                                __bch2_btree_iter_flags(trans, btree_id, flags),
2923                                _RET_IP_);        2923                                _RET_IP_);
2924                                                  2924 
2925         iter->min_depth = depth;                 2925         iter->min_depth = depth;
2926                                                  2926 
2927         struct btree_path *path = btree_iter_    2927         struct btree_path *path = btree_iter_path(trans, iter);
2928         BUG_ON(path->locks_want  < min(locks_    2928         BUG_ON(path->locks_want  < min(locks_want, BTREE_MAX_DEPTH));
2929         BUG_ON(path->level      != depth);       2929         BUG_ON(path->level      != depth);
2930         BUG_ON(iter->min_depth  != depth);       2930         BUG_ON(iter->min_depth  != depth);
2931 }                                                2931 }
2932                                                  2932 
2933 void bch2_trans_copy_iter(struct btree_iter *    2933 void bch2_trans_copy_iter(struct btree_iter *dst, struct btree_iter *src)
2934 {                                                2934 {
2935         struct btree_trans *trans = src->tran    2935         struct btree_trans *trans = src->trans;
2936                                                  2936 
2937         *dst = *src;                             2937         *dst = *src;
2938 #ifdef TRACK_PATH_ALLOCATED                      2938 #ifdef TRACK_PATH_ALLOCATED
2939         dst->ip_allocated = _RET_IP_;            2939         dst->ip_allocated = _RET_IP_;
2940 #endif                                           2940 #endif
2941         if (src->path)                           2941         if (src->path)
2942                 __btree_path_get(trans, trans    2942                 __btree_path_get(trans, trans->paths + src->path, src->flags & BTREE_ITER_intent);
2943         if (src->update_path)                    2943         if (src->update_path)
2944                 __btree_path_get(trans, trans    2944                 __btree_path_get(trans, trans->paths + src->update_path, src->flags & BTREE_ITER_intent);
2945         dst->key_cache_path = 0;                 2945         dst->key_cache_path = 0;
2946 }                                                2946 }
2947                                                  2947 
2948 void *__bch2_trans_kmalloc(struct btree_trans    2948 void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
2949 {                                                2949 {
2950         struct bch_fs *c = trans->c;             2950         struct bch_fs *c = trans->c;
2951         unsigned new_top = trans->mem_top + s    2951         unsigned new_top = trans->mem_top + size;
2952         unsigned old_bytes = trans->mem_bytes    2952         unsigned old_bytes = trans->mem_bytes;
2953         unsigned new_bytes = roundup_pow_of_t    2953         unsigned new_bytes = roundup_pow_of_two(new_top);
2954         int ret;                                 2954         int ret;
2955         void *new_mem;                           2955         void *new_mem;
2956         void *p;                                 2956         void *p;
2957                                                  2957 
2958         WARN_ON_ONCE(new_bytes > BTREE_TRANS_    2958         WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX);
2959                                                  2959 
2960         struct btree_transaction_stats *s = b    2960         struct btree_transaction_stats *s = btree_trans_stats(trans);
2961         s->max_mem = max(s->max_mem, new_byte    2961         s->max_mem = max(s->max_mem, new_bytes);
2962                                                  2962 
2963         if (trans->used_mempool) {               2963         if (trans->used_mempool) {
2964                 if (trans->mem_bytes >= new_b    2964                 if (trans->mem_bytes >= new_bytes)
2965                         goto out_change_top;     2965                         goto out_change_top;
2966                                                  2966 
2967                 /* No more space from mempool    2967                 /* No more space from mempool item, need malloc new one */
2968                 new_mem = kmalloc(new_bytes,     2968                 new_mem = kmalloc(new_bytes, GFP_NOWAIT|__GFP_NOWARN);
2969                 if (unlikely(!new_mem)) {        2969                 if (unlikely(!new_mem)) {
2970                         bch2_trans_unlock(tra    2970                         bch2_trans_unlock(trans);
2971                                                  2971 
2972                         new_mem = kmalloc(new    2972                         new_mem = kmalloc(new_bytes, GFP_KERNEL);
2973                         if (!new_mem)            2973                         if (!new_mem)
2974                                 return ERR_PT    2974                                 return ERR_PTR(-BCH_ERR_ENOMEM_trans_kmalloc);
2975                                                  2975 
2976                         ret = bch2_trans_relo    2976                         ret = bch2_trans_relock(trans);
2977                         if (ret) {               2977                         if (ret) {
2978                                 kfree(new_mem    2978                                 kfree(new_mem);
2979                                 return ERR_PT    2979                                 return ERR_PTR(ret);
2980                         }                        2980                         }
2981                 }                                2981                 }
2982                 memcpy(new_mem, trans->mem, t    2982                 memcpy(new_mem, trans->mem, trans->mem_top);
2983                 trans->used_mempool = false;     2983                 trans->used_mempool = false;
2984                 mempool_free(trans->mem, &c->    2984                 mempool_free(trans->mem, &c->btree_trans_mem_pool);
2985                 goto out_new_mem;                2985                 goto out_new_mem;
2986         }                                        2986         }
2987                                                  2987 
2988         new_mem = krealloc(trans->mem, new_by    2988         new_mem = krealloc(trans->mem, new_bytes, GFP_NOWAIT|__GFP_NOWARN);
2989         if (unlikely(!new_mem)) {                2989         if (unlikely(!new_mem)) {
2990                 bch2_trans_unlock(trans);        2990                 bch2_trans_unlock(trans);
2991                                                  2991 
2992                 new_mem = krealloc(trans->mem    2992                 new_mem = krealloc(trans->mem, new_bytes, GFP_KERNEL);
2993                 if (!new_mem && new_bytes <=     2993                 if (!new_mem && new_bytes <= BTREE_TRANS_MEM_MAX) {
2994                         new_mem = mempool_all    2994                         new_mem = mempool_alloc(&c->btree_trans_mem_pool, GFP_KERNEL);
2995                         new_bytes = BTREE_TRA    2995                         new_bytes = BTREE_TRANS_MEM_MAX;
2996                         memcpy(new_mem, trans    2996                         memcpy(new_mem, trans->mem, trans->mem_top);
2997                         trans->used_mempool =    2997                         trans->used_mempool = true;
2998                         kfree(trans->mem);       2998                         kfree(trans->mem);
2999                 }                                2999                 }
3000                                                  3000 
3001                 if (!new_mem)                    3001                 if (!new_mem)
3002                         return ERR_PTR(-BCH_E    3002                         return ERR_PTR(-BCH_ERR_ENOMEM_trans_kmalloc);
3003                                                  3003 
3004                 trans->mem = new_mem;            3004                 trans->mem = new_mem;
3005                 trans->mem_bytes = new_bytes;    3005                 trans->mem_bytes = new_bytes;
3006                                                  3006 
3007                 ret = bch2_trans_relock(trans    3007                 ret = bch2_trans_relock(trans);
3008                 if (ret)                         3008                 if (ret)
3009                         return ERR_PTR(ret);     3009                         return ERR_PTR(ret);
3010         }                                        3010         }
3011 out_new_mem:                                     3011 out_new_mem:
3012         trans->mem = new_mem;                    3012         trans->mem = new_mem;
3013         trans->mem_bytes = new_bytes;            3013         trans->mem_bytes = new_bytes;
3014                                                  3014 
3015         if (old_bytes) {                         3015         if (old_bytes) {
3016                 trace_and_count(c, trans_rest    3016                 trace_and_count(c, trans_restart_mem_realloced, trans, _RET_IP_, new_bytes);
3017                 return ERR_PTR(btree_trans_re    3017                 return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_mem_realloced));
3018         }                                        3018         }
3019 out_change_top:                                  3019 out_change_top:
3020         p = trans->mem + trans->mem_top;         3020         p = trans->mem + trans->mem_top;
3021         trans->mem_top += size;                  3021         trans->mem_top += size;
3022         memset(p, 0, size);                      3022         memset(p, 0, size);
3023         return p;                                3023         return p;
3024 }                                                3024 }
3025                                                  3025 
3026 static inline void check_srcu_held_too_long(s    3026 static inline void check_srcu_held_too_long(struct btree_trans *trans)
3027 {                                                3027 {
3028         WARN(trans->srcu_held && time_after(j    3028         WARN(trans->srcu_held && time_after(jiffies, trans->srcu_lock_time + HZ * 10),
3029              "btree trans held srcu lock (del    3029              "btree trans held srcu lock (delaying memory reclaim) for %lu seconds",
3030              (jiffies - trans->srcu_lock_time    3030              (jiffies - trans->srcu_lock_time) / HZ);
3031 }                                                3031 }
3032                                                  3032 
3033 void bch2_trans_srcu_unlock(struct btree_tran    3033 void bch2_trans_srcu_unlock(struct btree_trans *trans)
3034 {                                                3034 {
3035         if (trans->srcu_held) {                  3035         if (trans->srcu_held) {
3036                 struct bch_fs *c = trans->c;     3036                 struct bch_fs *c = trans->c;
3037                 struct btree_path *path;         3037                 struct btree_path *path;
3038                 unsigned i;                      3038                 unsigned i;
3039                                                  3039 
3040                 trans_for_each_path(trans, pa    3040                 trans_for_each_path(trans, path, i)
3041                         if (path->cached && !    3041                         if (path->cached && !btree_node_locked(path, 0))
3042                                 path->l[0].b     3042                                 path->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_srcu_reset);
3043                                                  3043 
3044                 check_srcu_held_too_long(tran    3044                 check_srcu_held_too_long(trans);
3045                 srcu_read_unlock(&c->btree_tr    3045                 srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
3046                 trans->srcu_held = false;        3046                 trans->srcu_held = false;
3047         }                                        3047         }
3048 }                                                3048 }
3049                                                  3049 
3050 static void bch2_trans_srcu_lock(struct btree    3050 static void bch2_trans_srcu_lock(struct btree_trans *trans)
3051 {                                                3051 {
3052         if (!trans->srcu_held) {                 3052         if (!trans->srcu_held) {
3053                 trans->srcu_idx = srcu_read_l    3053                 trans->srcu_idx = srcu_read_lock(&trans->c->btree_trans_barrier);
3054                 trans->srcu_lock_time   = jif    3054                 trans->srcu_lock_time   = jiffies;
3055                 trans->srcu_held = true;         3055                 trans->srcu_held = true;
3056         }                                        3056         }
3057 }                                                3057 }
3058                                                  3058 
3059 /**                                              3059 /**
3060  * bch2_trans_begin() - reset a transaction a    3060  * bch2_trans_begin() - reset a transaction after a interrupted attempt
3061  * @trans: transaction to reset                  3061  * @trans: transaction to reset
3062  *                                               3062  *
3063  * Returns:     current restart counter, to b    3063  * Returns:     current restart counter, to be used with trans_was_restarted()
3064  *                                               3064  *
3065  * While iterating over nodes or updating nod    3065  * While iterating over nodes or updating nodes a attempt to lock a btree node
3066  * may return BCH_ERR_transaction_restart whe    3066  * may return BCH_ERR_transaction_restart when the trylock fails. When this
3067  * occurs bch2_trans_begin() should be called    3067  * occurs bch2_trans_begin() should be called and the transaction retried.
3068  */                                              3068  */
3069 u32 bch2_trans_begin(struct btree_trans *tran    3069 u32 bch2_trans_begin(struct btree_trans *trans)
3070 {                                                3070 {
3071         struct btree_path *path;                 3071         struct btree_path *path;
3072         unsigned i;                              3072         unsigned i;
3073         u64 now;                                 3073         u64 now;
3074                                                  3074 
3075         bch2_trans_reset_updates(trans);         3075         bch2_trans_reset_updates(trans);
3076                                                  3076 
3077         trans->restart_count++;                  3077         trans->restart_count++;
3078         trans->mem_top                  = 0;     3078         trans->mem_top                  = 0;
3079         trans->journal_entries          = NUL    3079         trans->journal_entries          = NULL;
3080                                                  3080 
3081         trans_for_each_path(trans, path, i) {    3081         trans_for_each_path(trans, path, i) {
3082                 path->should_be_locked = fals    3082                 path->should_be_locked = false;
3083                                                  3083 
3084                 /*                               3084                 /*
3085                  * If the transaction wasn't     3085                  * If the transaction wasn't restarted, we're presuming to be
3086                  * doing something new: dont     3086                  * doing something new: dont keep iterators excpt the ones that
3087                  * are in use - except for th    3087                  * are in use - except for the subvolumes btree:
3088                  */                              3088                  */
3089                 if (!trans->restarted && path    3089                 if (!trans->restarted && path->btree_id != BTREE_ID_subvolumes)
3090                         path->preserve = fals    3090                         path->preserve = false;
3091                                                  3091 
3092                 /*                               3092                 /*
3093                  * XXX: we probably shouldn't    3093                  * XXX: we probably shouldn't be doing this if the transaction
3094                  * was restarted, but current    3094                  * was restarted, but currently we still overflow transaction
3095                  * iterators if we do that       3095                  * iterators if we do that
3096                  */                              3096                  */
3097                 if (!path->ref && !path->pres    3097                 if (!path->ref && !path->preserve)
3098                         __bch2_path_free(tran    3098                         __bch2_path_free(trans, i);
3099                 else                             3099                 else
3100                         path->preserve = fals    3100                         path->preserve = false;
3101         }                                        3101         }
3102                                                  3102 
3103         now = local_clock();                     3103         now = local_clock();
3104                                                  3104 
3105         if (!IS_ENABLED(CONFIG_BCACHEFS_NO_LA    3105         if (!IS_ENABLED(CONFIG_BCACHEFS_NO_LATENCY_ACCT) &&
3106             time_after64(now, trans->last_beg    3106             time_after64(now, trans->last_begin_time + 10))
3107                 __bch2_time_stats_update(&btr    3107                 __bch2_time_stats_update(&btree_trans_stats(trans)->duration,
3108                                          tran    3108                                          trans->last_begin_time, now);
3109                                                  3109 
3110         if (!trans->restarted &&                 3110         if (!trans->restarted &&
3111             (need_resched() ||                   3111             (need_resched() ||
3112              time_after64(now, trans->last_be    3112              time_after64(now, trans->last_begin_time + BTREE_TRANS_MAX_LOCK_HOLD_TIME_NS))) {
3113                 bch2_trans_unlock(trans);        3113                 bch2_trans_unlock(trans);
3114                 cond_resched();                  3114                 cond_resched();
3115                 now = local_clock();             3115                 now = local_clock();
3116         }                                        3116         }
3117         trans->last_begin_time = now;            3117         trans->last_begin_time = now;
3118                                                  3118 
3119         if (unlikely(trans->srcu_held &&         3119         if (unlikely(trans->srcu_held &&
3120                      time_after(jiffies, tran    3120                      time_after(jiffies, trans->srcu_lock_time + msecs_to_jiffies(10))))
3121                 bch2_trans_srcu_unlock(trans)    3121                 bch2_trans_srcu_unlock(trans);
3122                                                  3122 
3123         trans->last_begin_ip = _RET_IP_;         3123         trans->last_begin_ip = _RET_IP_;
3124                                                  3124 
3125         trans_set_locked(trans);                 3125         trans_set_locked(trans);
3126                                                  3126 
3127         if (trans->restarted) {                  3127         if (trans->restarted) {
3128                 bch2_btree_path_traverse_all(    3128                 bch2_btree_path_traverse_all(trans);
3129                 trans->notrace_relock_fail =     3129                 trans->notrace_relock_fail = false;
3130         }                                        3130         }
3131                                                  3131 
3132         bch2_trans_verify_not_unlocked(trans)    3132         bch2_trans_verify_not_unlocked(trans);
3133         return trans->restart_count;             3133         return trans->restart_count;
3134 }                                                3134 }
3135                                                  3135 
3136 const char *bch2_btree_transaction_fns[BCH_TR    3136 const char *bch2_btree_transaction_fns[BCH_TRANSACTIONS_NR] = { "(unknown)" };
3137                                                  3137 
3138 unsigned bch2_trans_get_fn_idx(const char *fn    3138 unsigned bch2_trans_get_fn_idx(const char *fn)
3139 {                                                3139 {
3140         for (unsigned i = 0; i < ARRAY_SIZE(b    3140         for (unsigned i = 0; i < ARRAY_SIZE(bch2_btree_transaction_fns); i++)
3141                 if (!bch2_btree_transaction_f    3141                 if (!bch2_btree_transaction_fns[i] ||
3142                     bch2_btree_transaction_fn    3142                     bch2_btree_transaction_fns[i] == fn) {
3143                         bch2_btree_transactio    3143                         bch2_btree_transaction_fns[i] = fn;
3144                         return i;                3144                         return i;
3145                 }                                3145                 }
3146                                                  3146 
3147         pr_warn_once("BCH_TRANSACTIONS_NR not    3147         pr_warn_once("BCH_TRANSACTIONS_NR not big enough!");
3148         return 0;                                3148         return 0;
3149 }                                                3149 }
3150                                                  3150 
3151 struct btree_trans *__bch2_trans_get(struct b    3151 struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx)
3152         __acquires(&c->btree_trans_barrier)      3152         __acquires(&c->btree_trans_barrier)
3153 {                                                3153 {
3154         struct btree_trans *trans;               3154         struct btree_trans *trans;
3155                                                  3155 
3156         if (IS_ENABLED(__KERNEL__)) {            3156         if (IS_ENABLED(__KERNEL__)) {
3157                 trans = this_cpu_xchg(c->btre    3157                 trans = this_cpu_xchg(c->btree_trans_bufs->trans, NULL);
3158                 if (trans) {                     3158                 if (trans) {
3159                         memset(trans, 0, offs    3159                         memset(trans, 0, offsetof(struct btree_trans, list));
3160                         goto got_trans;          3160                         goto got_trans;
3161                 }                                3161                 }
3162         }                                        3162         }
3163                                                  3163 
3164         trans = mempool_alloc(&c->btree_trans    3164         trans = mempool_alloc(&c->btree_trans_pool, GFP_NOFS);
3165         memset(trans, 0, sizeof(*trans));        3165         memset(trans, 0, sizeof(*trans));
3166                                                  3166 
3167         seqmutex_lock(&c->btree_trans_lock);     3167         seqmutex_lock(&c->btree_trans_lock);
3168         if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)    3168         if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
3169                 struct btree_trans *pos;         3169                 struct btree_trans *pos;
3170                 pid_t pid = current->pid;        3170                 pid_t pid = current->pid;
3171                                                  3171 
3172                 trans->locking_wait.task = cu    3172                 trans->locking_wait.task = current;
3173                                                  3173 
3174                 list_for_each_entry(pos, &c->    3174                 list_for_each_entry(pos, &c->btree_trans_list, list) {
3175                         struct task_struct *p    3175                         struct task_struct *pos_task = READ_ONCE(pos->locking_wait.task);
3176                         /*                       3176                         /*
3177                          * We'd much prefer t    3177                          * We'd much prefer to be stricter here and completely
3178                          * disallow multiple     3178                          * disallow multiple btree_trans in the same thread -
3179                          * but the data move     3179                          * but the data move path calls bch2_write when we
3180                          * already have a btr    3180                          * already have a btree_trans initialized.
3181                          */                      3181                          */
3182                         BUG_ON(pos_task &&       3182                         BUG_ON(pos_task &&
3183                                pid == pos_tas    3183                                pid == pos_task->pid &&
3184                                pos->locked);     3184                                pos->locked);
3185                 }                                3185                 }
3186         }                                        3186         }
3187                                                  3187 
3188         list_add(&trans->list, &c->btree_tran    3188         list_add(&trans->list, &c->btree_trans_list);
3189         seqmutex_unlock(&c->btree_trans_lock)    3189         seqmutex_unlock(&c->btree_trans_lock);
3190 got_trans:                                       3190 got_trans:
3191         trans->c                = c;             3191         trans->c                = c;
3192         trans->last_begin_time  = local_clock    3192         trans->last_begin_time  = local_clock();
3193         trans->fn_idx           = fn_idx;        3193         trans->fn_idx           = fn_idx;
3194         trans->locking_wait.task = current;      3194         trans->locking_wait.task = current;
3195         trans->journal_replay_not_finished =     3195         trans->journal_replay_not_finished =
3196                 unlikely(!test_bit(JOURNAL_re    3196                 unlikely(!test_bit(JOURNAL_replay_done, &c->journal.flags)) &&
3197                 atomic_inc_not_zero(&c->journ    3197                 atomic_inc_not_zero(&c->journal_keys.ref);
3198         trans->nr_paths         = ARRAY_SIZE(    3198         trans->nr_paths         = ARRAY_SIZE(trans->_paths);
3199         trans->paths_allocated  = trans->_pat    3199         trans->paths_allocated  = trans->_paths_allocated;
3200         trans->sorted           = trans->_sor    3200         trans->sorted           = trans->_sorted;
3201         trans->paths            = trans->_pat    3201         trans->paths            = trans->_paths;
3202         trans->updates          = trans->_upd    3202         trans->updates          = trans->_updates;
3203                                                  3203 
3204         *trans_paths_nr(trans->paths) = BTREE    3204         *trans_paths_nr(trans->paths) = BTREE_ITER_INITIAL;
3205                                                  3205 
3206         trans->paths_allocated[0] = 1;           3206         trans->paths_allocated[0] = 1;
3207                                                  3207 
3208         static struct lock_class_key lockdep_    3208         static struct lock_class_key lockdep_key;
3209         lockdep_init_map(&trans->dep_map, "bc    3209         lockdep_init_map(&trans->dep_map, "bcachefs_btree", &lockdep_key, 0);
3210                                                  3210 
3211         if (fn_idx < BCH_TRANSACTIONS_NR) {      3211         if (fn_idx < BCH_TRANSACTIONS_NR) {
3212                 trans->fn = bch2_btree_transa    3212                 trans->fn = bch2_btree_transaction_fns[fn_idx];
3213                                                  3213 
3214                 struct btree_transaction_stat    3214                 struct btree_transaction_stats *s = &c->btree_transaction_stats[fn_idx];
3215                                                  3215 
3216                 if (s->max_mem) {                3216                 if (s->max_mem) {
3217                         unsigned expected_mem    3217                         unsigned expected_mem_bytes = roundup_pow_of_two(s->max_mem);
3218                                                  3218 
3219                         trans->mem = kmalloc(    3219                         trans->mem = kmalloc(expected_mem_bytes, GFP_KERNEL);
3220                         if (likely(trans->mem    3220                         if (likely(trans->mem))
3221                                 trans->mem_by    3221                                 trans->mem_bytes = expected_mem_bytes;
3222                 }                                3222                 }
3223                                                  3223 
3224                 trans->nr_paths_max = s->nr_m    3224                 trans->nr_paths_max = s->nr_max_paths;
3225                 trans->journal_entries_size =    3225                 trans->journal_entries_size = s->journal_entries_size;
3226         }                                        3226         }
3227                                                  3227 
3228         trans->srcu_idx         = srcu_read_l    3228         trans->srcu_idx         = srcu_read_lock(&c->btree_trans_barrier);
3229         trans->srcu_lock_time   = jiffies;       3229         trans->srcu_lock_time   = jiffies;
3230         trans->srcu_held        = true;          3230         trans->srcu_held        = true;
3231         trans_set_locked(trans);                 3231         trans_set_locked(trans);
3232                                                  3232 
3233         closure_init_stack_release(&trans->re    3233         closure_init_stack_release(&trans->ref);
3234         return trans;                            3234         return trans;
3235 }                                                3235 }
3236                                                  3236 
3237 static void check_btree_paths_leaked(struct b    3237 static void check_btree_paths_leaked(struct btree_trans *trans)
3238 {                                                3238 {
3239 #ifdef CONFIG_BCACHEFS_DEBUG                     3239 #ifdef CONFIG_BCACHEFS_DEBUG
3240         struct bch_fs *c = trans->c;             3240         struct bch_fs *c = trans->c;
3241         struct btree_path *path;                 3241         struct btree_path *path;
3242         unsigned i;                              3242         unsigned i;
3243                                                  3243 
3244         trans_for_each_path(trans, path, i)      3244         trans_for_each_path(trans, path, i)
3245                 if (path->ref)                   3245                 if (path->ref)
3246                         goto leaked;             3246                         goto leaked;
3247         return;                                  3247         return;
3248 leaked:                                          3248 leaked:
3249         bch_err(c, "btree paths leaked from %    3249         bch_err(c, "btree paths leaked from %s!", trans->fn);
3250         trans_for_each_path(trans, path, i)      3250         trans_for_each_path(trans, path, i)
3251                 if (path->ref)                   3251                 if (path->ref)
3252                         printk(KERN_ERR "  bt    3252                         printk(KERN_ERR "  btree %s %pS\n",
3253                                bch2_btree_id_    3253                                bch2_btree_id_str(path->btree_id),
3254                                (void *) path-    3254                                (void *) path->ip_allocated);
3255         /* Be noisy about this: */               3255         /* Be noisy about this: */
3256         bch2_fatal_error(c);                     3256         bch2_fatal_error(c);
3257 #endif                                           3257 #endif
3258 }                                                3258 }
3259                                                  3259 
3260 void bch2_trans_put(struct btree_trans *trans    3260 void bch2_trans_put(struct btree_trans *trans)
3261         __releases(&c->btree_trans_barrier)      3261         __releases(&c->btree_trans_barrier)
3262 {                                                3262 {
3263         struct bch_fs *c = trans->c;             3263         struct bch_fs *c = trans->c;
3264                                                  3264 
3265         bch2_trans_unlock(trans);                3265         bch2_trans_unlock(trans);
3266                                                  3266 
3267         trans_for_each_update(trans, i)          3267         trans_for_each_update(trans, i)
3268                 __btree_path_put(trans, trans    3268                 __btree_path_put(trans, trans->paths + i->path, true);
3269         trans->nr_updates       = 0;             3269         trans->nr_updates       = 0;
3270                                                  3270 
3271         check_btree_paths_leaked(trans);         3271         check_btree_paths_leaked(trans);
3272                                                  3272 
3273         if (trans->srcu_held) {                  3273         if (trans->srcu_held) {
3274                 check_srcu_held_too_long(tran    3274                 check_srcu_held_too_long(trans);
3275                 srcu_read_unlock(&c->btree_tr    3275                 srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
3276         }                                        3276         }
3277                                                  3277 
3278         if (unlikely(trans->journal_replay_no    3278         if (unlikely(trans->journal_replay_not_finished))
3279                 bch2_journal_keys_put(c);        3279                 bch2_journal_keys_put(c);
3280                                                  3280 
3281         /*                                       3281         /*
3282          * trans->ref protects trans->locking    3282          * trans->ref protects trans->locking_wait.task, btree_paths array; used
3283          * by cycle detector                     3283          * by cycle detector
3284          */                                      3284          */
3285         closure_return_sync(&trans->ref);        3285         closure_return_sync(&trans->ref);
3286         trans->locking_wait.task = NULL;         3286         trans->locking_wait.task = NULL;
3287                                                  3287 
3288         unsigned long *paths_allocated = tran    3288         unsigned long *paths_allocated = trans->paths_allocated;
3289         trans->paths_allocated  = NULL;          3289         trans->paths_allocated  = NULL;
3290         trans->paths            = NULL;          3290         trans->paths            = NULL;
3291                                                  3291 
3292         if (paths_allocated != trans->_paths_    3292         if (paths_allocated != trans->_paths_allocated)
3293                 kvfree_rcu_mightsleep(paths_a    3293                 kvfree_rcu_mightsleep(paths_allocated);
3294                                                  3294 
3295         if (trans->used_mempool)                 3295         if (trans->used_mempool)
3296                 mempool_free(trans->mem, &c->    3296                 mempool_free(trans->mem, &c->btree_trans_mem_pool);
3297         else                                     3297         else
3298                 kfree(trans->mem);               3298                 kfree(trans->mem);
3299                                                  3299 
3300         /* Userspace doesn't have a real perc    3300         /* Userspace doesn't have a real percpu implementation: */
3301         if (IS_ENABLED(__KERNEL__))              3301         if (IS_ENABLED(__KERNEL__))
3302                 trans = this_cpu_xchg(c->btre    3302                 trans = this_cpu_xchg(c->btree_trans_bufs->trans, trans);
3303                                                  3303 
3304         if (trans) {                             3304         if (trans) {
3305                 seqmutex_lock(&c->btree_trans    3305                 seqmutex_lock(&c->btree_trans_lock);
3306                 list_del(&trans->list);          3306                 list_del(&trans->list);
3307                 seqmutex_unlock(&c->btree_tra    3307                 seqmutex_unlock(&c->btree_trans_lock);
3308                                                  3308 
3309                 mempool_free(trans, &c->btree    3309                 mempool_free(trans, &c->btree_trans_pool);
3310         }                                        3310         }
3311 }                                                3311 }
3312                                                  3312 
3313 bool bch2_current_has_btree_trans(struct bch_    3313 bool bch2_current_has_btree_trans(struct bch_fs *c)
3314 {                                                3314 {
3315         seqmutex_lock(&c->btree_trans_lock);     3315         seqmutex_lock(&c->btree_trans_lock);
3316         struct btree_trans *trans;               3316         struct btree_trans *trans;
3317         bool ret = false;                        3317         bool ret = false;
3318         list_for_each_entry(trans, &c->btree_    3318         list_for_each_entry(trans, &c->btree_trans_list, list)
3319                 if (trans->locking_wait.task     3319                 if (trans->locking_wait.task == current &&
3320                     trans->locked) {             3320                     trans->locked) {
3321                         ret = true;              3321                         ret = true;
3322                         break;                   3322                         break;
3323                 }                                3323                 }
3324         seqmutex_unlock(&c->btree_trans_lock)    3324         seqmutex_unlock(&c->btree_trans_lock);
3325         return ret;                              3325         return ret;
3326 }                                                3326 }
3327                                                  3327 
3328 static void __maybe_unused                       3328 static void __maybe_unused
3329 bch2_btree_bkey_cached_common_to_text(struct     3329 bch2_btree_bkey_cached_common_to_text(struct printbuf *out,
3330                                       struct     3330                                       struct btree_bkey_cached_common *b)
3331 {                                                3331 {
3332         struct six_lock_count c = six_lock_co    3332         struct six_lock_count c = six_lock_counts(&b->lock);
3333         struct task_struct *owner;               3333         struct task_struct *owner;
3334         pid_t pid;                               3334         pid_t pid;
3335                                                  3335 
3336         rcu_read_lock();                         3336         rcu_read_lock();
3337         owner = READ_ONCE(b->lock.owner);        3337         owner = READ_ONCE(b->lock.owner);
3338         pid = owner ? owner->pid : 0;            3338         pid = owner ? owner->pid : 0;
3339         rcu_read_unlock();                       3339         rcu_read_unlock();
3340                                                  3340 
3341         prt_printf(out, "\t%px %c l=%u %s:",     3341         prt_printf(out, "\t%px %c l=%u %s:", b, b->cached ? 'c' : 'b',
3342                    b->level, bch2_btree_id_st    3342                    b->level, bch2_btree_id_str(b->btree_id));
3343         bch2_bpos_to_text(out, btree_node_pos    3343         bch2_bpos_to_text(out, btree_node_pos(b));
3344                                                  3344 
3345         prt_printf(out, "\t locks %u:%u:%u he    3345         prt_printf(out, "\t locks %u:%u:%u held by pid %u",
3346                    c.n[0], c.n[1], c.n[2], pi    3346                    c.n[0], c.n[1], c.n[2], pid);
3347 }                                                3347 }
3348                                                  3348 
3349 void bch2_btree_trans_to_text(struct printbuf    3349 void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans)
3350 {                                                3350 {
3351         struct btree_bkey_cached_common *b;      3351         struct btree_bkey_cached_common *b;
3352         static char lock_types[] = { 'r', 'i'    3352         static char lock_types[] = { 'r', 'i', 'w' };
3353         struct task_struct *task = READ_ONCE(    3353         struct task_struct *task = READ_ONCE(trans->locking_wait.task);
3354         unsigned l, idx;                         3354         unsigned l, idx;
3355                                                  3355 
3356         /* before rcu_read_lock(): */            3356         /* before rcu_read_lock(): */
3357         bch2_printbuf_make_room(out, 4096);      3357         bch2_printbuf_make_room(out, 4096);
3358                                                  3358 
3359         if (!out->nr_tabstops) {                 3359         if (!out->nr_tabstops) {
3360                 printbuf_tabstop_push(out, 16    3360                 printbuf_tabstop_push(out, 16);
3361                 printbuf_tabstop_push(out, 32    3361                 printbuf_tabstop_push(out, 32);
3362         }                                        3362         }
3363                                                  3363 
3364         prt_printf(out, "%i %s\n", task ? tas    3364         prt_printf(out, "%i %s\n", task ? task->pid : 0, trans->fn);
3365                                                  3365 
3366         /* trans->paths is rcu protected vs.     3366         /* trans->paths is rcu protected vs. freeing */
3367         rcu_read_lock();                         3367         rcu_read_lock();
3368         out->atomic++;                           3368         out->atomic++;
3369                                                  3369 
3370         struct btree_path *paths = rcu_derefe    3370         struct btree_path *paths = rcu_dereference(trans->paths);
3371         if (!paths)                              3371         if (!paths)
3372                 goto out;                        3372                 goto out;
3373                                                  3373 
3374         unsigned long *paths_allocated = tran    3374         unsigned long *paths_allocated = trans_paths_allocated(paths);
3375                                                  3375 
3376         trans_for_each_path_idx_from(paths_al    3376         trans_for_each_path_idx_from(paths_allocated, *trans_paths_nr(paths), idx, 1) {
3377                 struct btree_path *path = pat    3377                 struct btree_path *path = paths + idx;
3378                 if (!path->nodes_locked)         3378                 if (!path->nodes_locked)
3379                         continue;                3379                         continue;
3380                                                  3380 
3381                 prt_printf(out, "  path %u %c    3381                 prt_printf(out, "  path %u %c l=%u %s:",
3382                        idx,                      3382                        idx,
3383                        path->cached ? 'c' : '    3383                        path->cached ? 'c' : 'b',
3384                        path->level,              3384                        path->level,
3385                        bch2_btree_id_str(path    3385                        bch2_btree_id_str(path->btree_id));
3386                 bch2_bpos_to_text(out, path->    3386                 bch2_bpos_to_text(out, path->pos);
3387                 prt_newline(out);                3387                 prt_newline(out);
3388                                                  3388 
3389                 for (l = 0; l < BTREE_MAX_DEP    3389                 for (l = 0; l < BTREE_MAX_DEPTH; l++) {
3390                         if (btree_node_locked    3390                         if (btree_node_locked(path, l) &&
3391                             !IS_ERR_OR_NULL(b    3391                             !IS_ERR_OR_NULL(b = (void *) READ_ONCE(path->l[l].b))) {
3392                                 prt_printf(ou    3392                                 prt_printf(out, "    %c l=%u ",
3393                                            lo    3393                                            lock_types[btree_node_locked_type(path, l)], l);
3394                                 bch2_btree_bk    3394                                 bch2_btree_bkey_cached_common_to_text(out, b);
3395                                 prt_newline(o    3395                                 prt_newline(out);
3396                         }                        3396                         }
3397                 }                                3397                 }
3398         }                                        3398         }
3399                                                  3399 
3400         b = READ_ONCE(trans->locking);           3400         b = READ_ONCE(trans->locking);
3401         if (b) {                                 3401         if (b) {
3402                 prt_printf(out, "  blocked fo    3402                 prt_printf(out, "  blocked for %lluus on\n",
3403                            div_u64(local_cloc    3403                            div_u64(local_clock() - trans->locking_wait.start_time, 1000));
3404                 prt_printf(out, "    %c", loc    3404                 prt_printf(out, "    %c", lock_types[trans->locking_wait.lock_want]);
3405                 bch2_btree_bkey_cached_common    3405                 bch2_btree_bkey_cached_common_to_text(out, b);
3406                 prt_newline(out);                3406                 prt_newline(out);
3407         }                                        3407         }
3408 out:                                             3408 out:
3409         --out->atomic;                           3409         --out->atomic;
3410         rcu_read_unlock();                       3410         rcu_read_unlock();
3411 }                                                3411 }
3412                                                  3412 
3413 void bch2_fs_btree_iter_exit(struct bch_fs *c    3413 void bch2_fs_btree_iter_exit(struct bch_fs *c)
3414 {                                                3414 {
3415         struct btree_transaction_stats *s;       3415         struct btree_transaction_stats *s;
3416         struct btree_trans *trans;               3416         struct btree_trans *trans;
3417         int cpu;                                 3417         int cpu;
3418                                                  3418 
3419         if (c->btree_trans_bufs)                 3419         if (c->btree_trans_bufs)
3420                 for_each_possible_cpu(cpu) {     3420                 for_each_possible_cpu(cpu) {
3421                         struct btree_trans *t    3421                         struct btree_trans *trans =
3422                                 per_cpu_ptr(c    3422                                 per_cpu_ptr(c->btree_trans_bufs, cpu)->trans;
3423                                                  3423 
3424                         if (trans) {             3424                         if (trans) {
3425                                 seqmutex_lock    3425                                 seqmutex_lock(&c->btree_trans_lock);
3426                                 list_del(&tra    3426                                 list_del(&trans->list);
3427                                 seqmutex_unlo    3427                                 seqmutex_unlock(&c->btree_trans_lock);
3428                         }                        3428                         }
3429                         kfree(trans);            3429                         kfree(trans);
3430                 }                                3430                 }
3431         free_percpu(c->btree_trans_bufs);        3431         free_percpu(c->btree_trans_bufs);
3432                                                  3432 
3433         trans = list_first_entry_or_null(&c->    3433         trans = list_first_entry_or_null(&c->btree_trans_list, struct btree_trans, list);
3434         if (trans)                               3434         if (trans)
3435                 panic("%s leaked btree_trans\    3435                 panic("%s leaked btree_trans\n", trans->fn);
3436                                                  3436 
3437         for (s = c->btree_transaction_stats;     3437         for (s = c->btree_transaction_stats;
3438              s < c->btree_transaction_stats +    3438              s < c->btree_transaction_stats + ARRAY_SIZE(c->btree_transaction_stats);
3439              s++) {                              3439              s++) {
3440                 kfree(s->max_paths_text);        3440                 kfree(s->max_paths_text);
3441                 bch2_time_stats_exit(&s->lock    3441                 bch2_time_stats_exit(&s->lock_hold_times);
3442         }                                        3442         }
3443                                                  3443 
3444         if (c->btree_trans_barrier_initialize    3444         if (c->btree_trans_barrier_initialized) {
3445                 synchronize_srcu_expedited(&c    3445                 synchronize_srcu_expedited(&c->btree_trans_barrier);
3446                 cleanup_srcu_struct(&c->btree    3446                 cleanup_srcu_struct(&c->btree_trans_barrier);
3447         }                                        3447         }
3448         mempool_exit(&c->btree_trans_mem_pool    3448         mempool_exit(&c->btree_trans_mem_pool);
3449         mempool_exit(&c->btree_trans_pool);      3449         mempool_exit(&c->btree_trans_pool);
3450 }                                                3450 }
3451                                                  3451 
3452 void bch2_fs_btree_iter_init_early(struct bch    3452 void bch2_fs_btree_iter_init_early(struct bch_fs *c)
3453 {                                                3453 {
3454         struct btree_transaction_stats *s;       3454         struct btree_transaction_stats *s;
3455                                                  3455 
3456         for (s = c->btree_transaction_stats;     3456         for (s = c->btree_transaction_stats;
3457              s < c->btree_transaction_stats +    3457              s < c->btree_transaction_stats + ARRAY_SIZE(c->btree_transaction_stats);
3458              s++) {                              3458              s++) {
3459                 bch2_time_stats_init(&s->dura    3459                 bch2_time_stats_init(&s->duration);
3460                 bch2_time_stats_init(&s->lock    3460                 bch2_time_stats_init(&s->lock_hold_times);
3461                 mutex_init(&s->lock);            3461                 mutex_init(&s->lock);
3462         }                                        3462         }
3463                                                  3463 
3464         INIT_LIST_HEAD(&c->btree_trans_list);    3464         INIT_LIST_HEAD(&c->btree_trans_list);
3465         seqmutex_init(&c->btree_trans_lock);     3465         seqmutex_init(&c->btree_trans_lock);
3466 }                                                3466 }
3467                                                  3467 
3468 int bch2_fs_btree_iter_init(struct bch_fs *c)    3468 int bch2_fs_btree_iter_init(struct bch_fs *c)
3469 {                                                3469 {
3470         int ret;                                 3470         int ret;
3471                                                  3471 
3472         c->btree_trans_bufs = alloc_percpu(st    3472         c->btree_trans_bufs = alloc_percpu(struct btree_trans_buf);
3473         if (!c->btree_trans_bufs)                3473         if (!c->btree_trans_bufs)
3474                 return -ENOMEM;                  3474                 return -ENOMEM;
3475                                                  3475 
3476         ret   = mempool_init_kmalloc_pool(&c-    3476         ret   = mempool_init_kmalloc_pool(&c->btree_trans_pool, 1,
3477                                           siz    3477                                           sizeof(struct btree_trans)) ?:
3478                 mempool_init_kmalloc_pool(&c-    3478                 mempool_init_kmalloc_pool(&c->btree_trans_mem_pool, 1,
3479                                           BTR    3479                                           BTREE_TRANS_MEM_MAX) ?:
3480                 init_srcu_struct(&c->btree_tr    3480                 init_srcu_struct(&c->btree_trans_barrier);
3481         if (ret)                                 3481         if (ret)
3482                 return ret;                      3482                 return ret;
3483                                                  3483 
3484         /*                                       3484         /*
3485          * static annotation (hackily done) f    3485          * static annotation (hackily done) for lock ordering of reclaim vs.
3486          * btree node locks:                     3486          * btree node locks:
3487          */                                      3487          */
3488 #ifdef CONFIG_LOCKDEP                            3488 #ifdef CONFIG_LOCKDEP
3489         fs_reclaim_acquire(GFP_KERNEL);          3489         fs_reclaim_acquire(GFP_KERNEL);
3490         struct btree_trans *trans = bch2_tran    3490         struct btree_trans *trans = bch2_trans_get(c);
3491         trans_set_locked(trans);                 3491         trans_set_locked(trans);
3492         bch2_trans_put(trans);                   3492         bch2_trans_put(trans);
3493         fs_reclaim_release(GFP_KERNEL);          3493         fs_reclaim_release(GFP_KERNEL);
3494 #endif                                           3494 #endif
3495                                                  3495 
3496         c->btree_trans_barrier_initialized =     3496         c->btree_trans_barrier_initialized = true;
3497         return 0;                                3497         return 0;
3498                                                  3498 
3499 }                                                3499 }
3500                                                  3500 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php