~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/fs/bcachefs/btree_write_buffer.c

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /fs/bcachefs/btree_write_buffer.c (Version linux-6.12-rc7) and /fs/bcachefs/btree_write_buffer.c (Version linux-5.18.19)


  1 // SPDX-License-Identifier: GPL-2.0                 1 
  2                                                   
  3 #include "bcachefs.h"                             
  4 #include "bkey_buf.h"                             
  5 #include "btree_locking.h"                        
  6 #include "btree_update.h"                         
  7 #include "btree_update_interior.h"                
  8 #include "btree_write_buffer.h"                   
  9 #include "disk_accounting.h"                      
 10 #include "error.h"                                
 11 #include "extents.h"                              
 12 #include "journal.h"                              
 13 #include "journal_io.h"                           
 14 #include "journal_reclaim.h"                      
 15                                                   
 16 #include <linux/prefetch.h>                       
 17 #include <linux/sort.h>                           
 18                                                   
 19 static int bch2_btree_write_buffer_journal_flu    
 20                                 struct journal    
 21                                                   
 22 static int bch2_journal_keys_to_write_buffer(s    
 23                                                   
 24 static inline bool __wb_key_ref_cmp(const stru    
 25 {                                                 
 26         return (cmp_int(l->hi, r->hi) ?:          
 27                 cmp_int(l->mi, r->mi) ?:          
 28                 cmp_int(l->lo, r->lo)) >= 0;      
 29 }                                                 
 30                                                   
 31 static inline bool wb_key_ref_cmp(const struct    
 32 {                                                 
 33 #ifdef CONFIG_X86_64                              
 34         int cmp;                                  
 35                                                   
 36         asm("mov   (%[l]), %%rax;"                
 37             "sub   (%[r]), %%rax;"                
 38             "mov  8(%[l]), %%rax;"                
 39             "sbb  8(%[r]), %%rax;"                
 40             "mov 16(%[l]), %%rax;"                
 41             "sbb 16(%[r]), %%rax;"                
 42             : "=@ccae" (cmp)                      
 43             : [l] "r" (l), [r] "r" (r)            
 44             : "rax", "cc");                       
 45                                                   
 46         EBUG_ON(cmp != __wb_key_ref_cmp(l, r))    
 47         return cmp;                               
 48 #else                                             
 49         return __wb_key_ref_cmp(l, r);            
 50 #endif                                            
 51 }                                                 
 52                                                   
 53 static int wb_key_seq_cmp(const void *_l, cons    
 54 {                                                 
 55         const struct btree_write_buffered_key     
 56         const struct btree_write_buffered_key     
 57                                                   
 58         return cmp_int(l->journal_seq, r->jour    
 59 }                                                 
 60                                                   
 61 /* Compare excluding idx, the low 24 bits: */     
 62 static inline bool wb_key_eq(const void *_l, c    
 63 {                                                 
 64         const struct wb_key_ref *l = _l;          
 65         const struct wb_key_ref *r = _r;          
 66                                                   
 67         return !((l->hi ^ r->hi)|                 
 68                  (l->mi ^ r->mi)|                 
 69                  ((l->lo >> 24) ^ (r->lo >> 24    
 70 }                                                 
 71                                                   
 72 static noinline void wb_sort(struct wb_key_ref    
 73 {                                                 
 74         size_t n = num, a = num / 2;              
 75                                                   
 76         if (!a)         /* num < 2 || size ==     
 77                 return;                           
 78                                                   
 79         for (;;) {                                
 80                 size_t b, c, d;                   
 81                                                   
 82                 if (a)                  /* Bui    
 83                         --a;                      
 84                 else if (--n)           /* Sor    
 85                         swap(base[0], base[n])    
 86                 else                    /* Sor    
 87                         break;                    
 88                                                   
 89                 /*                                
 90                  * Sift element at "a" down in    
 91                  * "bottom-up" variant, which     
 92                  * calls to cmp_func(): we fin    
 93                  * the way to the leaves (one     
 94                  * backtrack to find where to     
 95                  *                                
 96                  * Because elements tend to si    
 97                  * this uses fewer compares th    
 98                  * on the way down.  (A bit mo    
 99                  * average, 3/4 worst-case.)      
100                  */                               
101                 for (b = a; c = 2*b + 1, (d =     
102                         b = wb_key_ref_cmp(bas    
103                 if (d == n)             /* Spe    
104                         b = c;                    
105                                                   
106                 /* Now backtrack from "b" to t    
107                 while (b != a && wb_key_ref_cm    
108                         b = (b - 1) / 2;          
109                 c = b;                  /* Whe    
110                 while (b != a) {        /* Shi    
111                         b = (b - 1) / 2;          
112                         swap(base[b], base[c])    
113                 }                                 
114         }                                         
115 }                                                 
116                                                   
117 static noinline int wb_flush_one_slowpath(stru    
118                                           stru    
119                                           stru    
120 {                                                 
121         struct btree_path *path = btree_iter_p    
122                                                   
123         bch2_btree_node_unlock_write(trans, pa    
124                                                   
125         trans->journal_res.seq = wb->journal_s    
126                                                   
127         return bch2_trans_update(trans, iter,     
128                                  BTREE_UPDATE_    
129                 bch2_trans_commit(trans, NULL,    
130                                   BCH_TRANS_CO    
131                                   BCH_TRANS_CO    
132                                   BCH_TRANS_CO    
133                                   BCH_TRANS_CO    
134 }                                                 
135                                                   
136 static inline int wb_flush_one(struct btree_tr    
137                                struct btree_wr    
138                                bool *write_loc    
139                                bool *accountin    
140                                size_t *fast)      
141 {                                                 
142         struct btree_path *path;                  
143         int ret;                                  
144                                                   
145         EBUG_ON(!wb->journal_seq);                
146         EBUG_ON(!trans->c->btree_write_buffer.    
147         EBUG_ON(trans->c->btree_write_buffer.f    
148                                                   
149         ret = bch2_btree_iter_traverse(iter);     
150         if (ret)                                  
151                 return ret;                       
152                                                   
153         if (!*accounting_accumulated && wb->k.    
154                 struct bkey u;                    
155                 struct bkey_s_c k = bch2_btree    
156                                                   
157                 if (k.k->type == KEY_TYPE_acco    
158                         bch2_accounting_accumu    
159                                                   
160         }                                         
161         *accounting_accumulated = true;           
162                                                   
163         /*                                        
164          * We can't clone a path that has writ    
165          * set_pos and traverse():                
166          */                                       
167         if (btree_iter_path(trans, iter)->ref     
168                 iter->path = __bch2_btree_path    
169                                                   
170         path = btree_iter_path(trans, iter);      
171                                                   
172         if (!*write_locked) {                     
173                 ret = bch2_btree_node_lock_wri    
174                 if (ret)                          
175                         return ret;               
176                                                   
177                 bch2_btree_node_prep_for_write    
178                 *write_locked = true;             
179         }                                         
180                                                   
181         if (unlikely(!bch2_btree_node_insert_f    
182                 *write_locked = false;            
183                 return wb_flush_one_slowpath(t    
184         }                                         
185                                                   
186         bch2_btree_insert_key_leaf(trans, path    
187         (*fast)++;                                
188         return 0;                                 
189 }                                                 
190                                                   
191 /*                                                
192  * Update a btree with a write buffered key us    
193  * original write buffer insert.                  
194  *                                                
195  * It is not safe to rejournal the key once it    
196  * buffer because that may break recovery orde    
197  * have already been modified in the active wr    
198  * before the current transaction. If we were     
199  * crash, recovery would process updates in th    
200  */                                               
201 static int                                        
202 btree_write_buffered_insert(struct btree_trans    
203                           struct btree_write_b    
204 {                                                 
205         struct btree_iter iter;                   
206         int ret;                                  
207                                                   
208         bch2_trans_iter_init(trans, &iter, wb-    
209                              BTREE_ITER_cached    
210                                                   
211         trans->journal_res.seq = wb->journal_s    
212                                                   
213         ret   = bch2_btree_iter_traverse(&iter    
214                 bch2_trans_update(trans, &iter    
215                                   BTREE_UPDATE    
216         bch2_trans_iter_exit(trans, &iter);       
217         return ret;                               
218 }                                                 
219                                                   
220 static void move_keys_from_inc_to_flushing(str    
221 {                                                 
222         struct bch_fs *c = container_of(wb, st    
223         struct journal *j = &c->journal;          
224                                                   
225         if (!wb->inc.keys.nr)                     
226                 return;                           
227                                                   
228         bch2_journal_pin_add(j, wb->inc.keys.d    
229                              bch2_btree_write_    
230                                                   
231         darray_resize(&wb->flushing.keys, min_    
232         darray_resize(&wb->sorted, wb->flushin    
233                                                   
234         if (!wb->flushing.keys.nr && wb->sorte    
235                 swap(wb->flushing.keys, wb->in    
236                 goto out;                         
237         }                                         
238                                                   
239         size_t nr = min(darray_room(wb->flushi    
240                         wb->sorted.size - wb->    
241         nr = min(nr, wb->inc.keys.nr);            
242                                                   
243         memcpy(&darray_top(wb->flushing.keys),    
244                wb->inc.keys.data,                 
245                sizeof(wb->inc.keys.data[0]) *     
246                                                   
247         memmove(wb->inc.keys.data,                
248                 wb->inc.keys.data + nr,           
249                sizeof(wb->inc.keys.data[0]) *     
250                                                   
251         wb->flushing.keys.nr    += nr;            
252         wb->inc.keys.nr         -= nr;            
253 out:                                              
254         if (!wb->inc.keys.nr)                     
255                 bch2_journal_pin_drop(j, &wb->    
256         else                                      
257                 bch2_journal_pin_update(j, wb-    
258                                         bch2_b    
259                                                   
260         if (j->watermark) {                       
261                 spin_lock(&j->lock);              
262                 bch2_journal_set_watermark(j);    
263                 spin_unlock(&j->lock);            
264         }                                         
265                                                   
266         BUG_ON(wb->sorted.size < wb->flushing.    
267 }                                                 
268                                                   
269 static int bch2_btree_write_buffer_flush_locke    
270 {                                                 
271         struct bch_fs *c = trans->c;              
272         struct journal *j = &c->journal;          
273         struct btree_write_buffer *wb = &c->bt    
274         struct btree_iter iter = { NULL };        
275         size_t overwritten = 0, fast = 0, slow    
276         bool write_locked = false;                
277         bool accounting_replay_done = test_bit    
278         int ret = 0;                              
279                                                   
280         bch2_trans_unlock(trans);                 
281         bch2_trans_begin(trans);                  
282                                                   
283         mutex_lock(&wb->inc.lock);                
284         move_keys_from_inc_to_flushing(wb);       
285         mutex_unlock(&wb->inc.lock);              
286                                                   
287         for (size_t i = 0; i < wb->flushing.ke    
288                 wb->sorted.data[i].idx = i;       
289                 wb->sorted.data[i].btree = wb-    
290                 memcpy(&wb->sorted.data[i].pos    
291         }                                         
292         wb->sorted.nr = wb->flushing.keys.nr;     
293                                                   
294         /*                                        
295          * We first sort so that we can detect    
296          * then we attempt to flush in sorted     
297          * efficient.                             
298          *                                        
299          * However, since we're not flushing i    
300          * journal we won't be able to drop ou    
301          * flushed - which means this could de    
302          * passing BCH_TRANS_COMMIT_journal_re    
303          * if it would block taking a journal     
304          *                                        
305          * If that happens, simply skip the ke    
306          * as many keys as possible in the fas    
307          */                                       
308         wb_sort(wb->sorted.data, wb->sorted.nr    
309                                                   
310         darray_for_each(wb->sorted, i) {          
311                 struct btree_write_buffered_ke    
312                                                   
313                 for (struct wb_key_ref *n = i     
314                         prefetch(&wb->flushing    
315                                                   
316                 BUG_ON(!k->journal_seq);          
317                                                   
318                 if (!accounting_replay_done &&    
319                     k->k.k.type == KEY_TYPE_ac    
320                         slowpath++;               
321                         continue;                 
322                 }                                 
323                                                   
324                 if (i + 1 < &darray_top(wb->so    
325                     wb_key_eq(i, i + 1)) {        
326                         struct btree_write_buf    
327                                                   
328                         if (k->k.k.type == KEY    
329                             n->k.k.type == KEY    
330                                 bch2_accountin    
331                                                   
332                                                   
333                         overwritten++;            
334                         n->journal_seq = min_t    
335                         k->journal_seq = 0;       
336                         continue;                 
337                 }                                 
338                                                   
339                 if (write_locked) {               
340                         struct btree_path *pat    
341                                                   
342                         if (path->btree_id !=     
343                             bpos_gt(k->k.k.p,     
344                                 bch2_btree_nod    
345                                 write_locked =    
346                                                   
347                                 ret = lockrest    
348                                         bch2_b    
349                                         bch2_f    
350                                                   
351                                                   
352                                                   
353                                                   
354                                 if (ret)          
355                                         goto e    
356                         }                         
357                 }                                 
358                                                   
359                 if (!iter.path || iter.btree_i    
360                         bch2_trans_iter_exit(t    
361                         bch2_trans_iter_init(t    
362                                              B    
363                 }                                 
364                                                   
365                 bch2_btree_iter_set_pos(&iter,    
366                 btree_iter_path(trans, &iter)-    
367                                                   
368                 bool accounting_accumulated =     
369                 do {                              
370                         if (race_fault()) {       
371                                 ret = -BCH_ERR    
372                                 break;            
373                         }                         
374                                                   
375                         ret = wb_flush_one(tra    
376                                            &ac    
377                         if (!write_locked)        
378                                 bch2_trans_beg    
379                 } while (bch2_err_matches(ret,    
380                                                   
381                 if (!ret) {                       
382                         k->journal_seq = 0;       
383                 } else if (ret == -BCH_ERR_jou    
384                         slowpath++;               
385                         ret = 0;                  
386                 } else                            
387                         break;                    
388         }                                         
389                                                   
390         if (write_locked) {                       
391                 struct btree_path *path = btre    
392                 bch2_btree_node_unlock_write(t    
393         }                                         
394         bch2_trans_iter_exit(trans, &iter);       
395                                                   
396         if (ret)                                  
397                 goto err;                         
398                                                   
399         if (slowpath) {                           
400                 /*                                
401                  * Flush in the order they wer    
402                  * we can release journal pins    
403                  * The fastpath zapped the seq    
404                  * we can skip those here.        
405                  */                               
406                 trace_and_count(c, write_buffe    
407                                                   
408                 sort(wb->flushing.keys.data,      
409                      wb->flushing.keys.nr,        
410                      sizeof(wb->flushing.keys.    
411                      wb_key_seq_cmp, NULL);       
412                                                   
413                 darray_for_each(wb->flushing.k    
414                         if (!i->journal_seq)      
415                                 continue;         
416                                                   
417                         if (!accounting_replay    
418                             i->k.k.type == KEY    
419                                 could_not_inse    
420                                 continue;         
421                         }                         
422                                                   
423                         if (!could_not_insert)    
424                                 bch2_journal_p    
425                                                   
426                                                   
427                         bch2_trans_begin(trans    
428                                                   
429                         ret = commit_do(trans,    
430                                         BCH_WA    
431                                         BCH_TR    
432                                         BCH_TR    
433                                         BCH_TR    
434                                         BCH_TR    
435                                         btree_    
436                         if (ret)                  
437                                 goto err;         
438                                                   
439                         i->journal_seq = 0;       
440                 }                                 
441                                                   
442                 /*                                
443                  * If journal replay hasn't fi    
444                  * can't flush accounting keys    
445                  * them for next time.            
446                  *                                
447                  * Q: Can the write buffer ove    
448                  * A Shouldn't be any actual r    
449                  * updates that the write buff    
450                  * going to be generated by in    
451                  * journal replay has to split    
452                  * its updates.                   
453                  *                                
454                  * And for those new acounting    
455                  * counters get accumulated as    
456                  * to the write buffer - see t    
457                  * accumulated. So we could on    
458                  * distinct counters touched s    
459                  */                               
460                 if (could_not_insert) {           
461                         struct btree_write_buf    
462                                                   
463                         darray_for_each(wb->fl    
464                                 if (i->journal    
465                                         *dst++    
466                         wb->flushing.keys.nr =    
467                 }                                 
468         }                                         
469 err:                                              
470         if (ret || !could_not_insert) {           
471                 bch2_journal_pin_drop(j, &wb->    
472                 wb->flushing.keys.nr = 0;         
473         }                                         
474                                                   
475         bch2_fs_fatal_err_on(ret, c, "%s", bch    
476         trace_write_buffer_flush(trans, wb->fl    
477         return ret;                               
478 }                                                 
479                                                   
480 static int fetch_wb_keys_from_journal(struct b    
481 {                                                 
482         struct journal *j = &c->journal;          
483         struct journal_buf *buf;                  
484         int ret = 0;                              
485                                                   
486         while (!ret && (buf = bch2_next_write_    
487                 ret = bch2_journal_keys_to_wri    
488                 mutex_unlock(&j->buf_lock);       
489         }                                         
490                                                   
491         return ret;                               
492 }                                                 
493                                                   
494 static int btree_write_buffer_flush_seq(struct    
495 {                                                 
496         struct bch_fs *c = trans->c;              
497         struct btree_write_buffer *wb = &c->bt    
498         int ret = 0, fetch_from_journal_err;      
499                                                   
500         do {                                      
501                 bch2_trans_unlock(trans);         
502                                                   
503                 fetch_from_journal_err = fetch    
504                                                   
505                 /*                                
506                  * On memory allocation failur    
507                  * is not guaranteed to empty     
508                  */                               
509                 mutex_lock(&wb->flushing.lock)    
510                 ret = bch2_btree_write_buffer_    
511                 mutex_unlock(&wb->flushing.loc    
512         } while (!ret &&                          
513                  (fetch_from_journal_err ||       
514                   (wb->inc.pin.seq && wb->inc.    
515                   (wb->flushing.pin.seq && wb-    
516                                                   
517         return ret;                               
518 }                                                 
519                                                   
520 static int bch2_btree_write_buffer_journal_flu    
521                                 struct journal    
522 {                                                 
523         struct bch_fs *c = container_of(j, str    
524                                                   
525         return bch2_trans_run(c, btree_write_b    
526 }                                                 
527                                                   
528 int bch2_btree_write_buffer_flush_sync(struct     
529 {                                                 
530         struct bch_fs *c = trans->c;              
531                                                   
532         trace_and_count(c, write_buffer_flush_    
533                                                   
534         return btree_write_buffer_flush_seq(tr    
535 }                                                 
536                                                   
537 int bch2_btree_write_buffer_flush_nocheck_rw(s    
538 {                                                 
539         struct bch_fs *c = trans->c;              
540         struct btree_write_buffer *wb = &c->bt    
541         int ret = 0;                              
542                                                   
543         if (mutex_trylock(&wb->flushing.lock))    
544                 ret = bch2_btree_write_buffer_    
545                 mutex_unlock(&wb->flushing.loc    
546         }                                         
547                                                   
548         return ret;                               
549 }                                                 
550                                                   
551 int bch2_btree_write_buffer_tryflush(struct bt    
552 {                                                 
553         struct bch_fs *c = trans->c;              
554                                                   
555         if (!bch2_write_ref_tryget(c, BCH_WRIT    
556                 return -BCH_ERR_erofs_no_write    
557                                                   
558         int ret = bch2_btree_write_buffer_flus    
559         bch2_write_ref_put(c, BCH_WRITE_REF_bt    
560         return ret;                               
561 }                                                 
562                                                   
563 /*                                                
564  * In check and repair code, when checking ref    
565  * need to issue a flush before we have a defi    
566  * if this is a key we haven't yet checked.       
567  */                                               
568 int bch2_btree_write_buffer_maybe_flush(struct    
569                                         struct    
570                                         struct    
571 {                                                 
572         struct bch_fs *c = trans->c;              
573         struct bkey_buf tmp;                      
574         int ret = 0;                              
575                                                   
576         bch2_bkey_buf_init(&tmp);                 
577                                                   
578         if (!bkey_and_val_eq(referring_k, bkey    
579                 bch2_bkey_buf_reassemble(&tmp,    
580                                                   
581                 if (bkey_is_btree_ptr(referrin    
582                         bch2_trans_unlock(tran    
583                         bch2_btree_interior_up    
584                 }                                 
585                                                   
586                 ret = bch2_btree_write_buffer_    
587                 if (ret)                          
588                         goto err;                 
589                                                   
590                 bch2_bkey_buf_copy(last_flushe    
591                 ret = -BCH_ERR_transaction_res    
592         }                                         
593 err:                                              
594         bch2_bkey_buf_exit(&tmp, c);              
595         return ret;                               
596 }                                                 
597                                                   
598 static void bch2_btree_write_buffer_flush_work    
599 {                                                 
600         struct bch_fs *c = container_of(work,     
601         struct btree_write_buffer *wb = &c->bt    
602         int ret;                                  
603                                                   
604         mutex_lock(&wb->flushing.lock);           
605         do {                                      
606                 ret = bch2_trans_run(c, bch2_b    
607         } while (!ret && bch2_btree_write_buff    
608         mutex_unlock(&wb->flushing.lock);         
609                                                   
610         bch2_write_ref_put(c, BCH_WRITE_REF_bt    
611 }                                                 
612                                                   
613 static void wb_accounting_sort(struct btree_wr    
614 {                                                 
615         eytzinger0_sort(wb->accounting.data, w    
616                         sizeof(wb->accounting.    
617                         wb_key_cmp, NULL);        
618 }                                                 
619                                                   
620 int bch2_accounting_key_to_wb_slowpath(struct     
621                                        struct     
622 {                                                 
623         struct btree_write_buffer *wb = &c->bt    
624         struct btree_write_buffered_key new =     
625                                                   
626         bkey_copy(&new.k, &k->k_i);               
627                                                   
628         int ret = darray_push(&wb->accounting,    
629         if (ret)                                  
630                 return ret;                       
631                                                   
632         wb_accounting_sort(wb);                   
633         return 0;                                 
634 }                                                 
635                                                   
636 int bch2_journal_key_to_wb_slowpath(struct bch    
637                              struct journal_ke    
638                              enum btree_id btr    
639 {                                                 
640         struct btree_write_buffer *wb = &c->bt    
641         int ret;                                  
642 retry:                                            
643         ret = darray_make_room_gfp(&dst->wb->k    
644         if (!ret && dst->wb == &wb->flushing)     
645                 ret = darray_resize(&wb->sorte    
646                                                   
647         if (unlikely(ret)) {                      
648                 if (dst->wb == &c->btree_write    
649                         mutex_unlock(&dst->wb-    
650                         dst->wb = &c->btree_wr    
651                         bch2_journal_pin_add(&    
652                                              b    
653                         goto retry;               
654                 }                                 
655                                                   
656                 return ret;                       
657         }                                         
658                                                   
659         dst->room = darray_room(dst->wb->keys)    
660         if (dst->wb == &wb->flushing)             
661                 dst->room = min(dst->room, wb-    
662         BUG_ON(!dst->room);                       
663         BUG_ON(!dst->seq);                        
664                                                   
665         struct btree_write_buffered_key *wb_k     
666         wb_k->journal_seq       = dst->seq;       
667         wb_k->btree             = btree;          
668         bkey_copy(&wb_k->k, k);                   
669         dst->wb->keys.nr++;                       
670         dst->room--;                              
671         return 0;                                 
672 }                                                 
673                                                   
674 void bch2_journal_keys_to_write_buffer_start(s    
675 {                                                 
676         struct btree_write_buffer *wb = &c->bt    
677                                                   
678         if (mutex_trylock(&wb->flushing.lock))    
679                 mutex_lock(&wb->inc.lock);        
680                 move_keys_from_inc_to_flushing    
681                                                   
682                 /*                                
683                  * Attempt to skip wb->inc, an    
684                  * wb->flushing, saving us a c    
685                  */                               
686                                                   
687                 if (!wb->inc.keys.nr) {           
688                         dst->wb = &wb->flushin    
689                 } else {                          
690                         mutex_unlock(&wb->flus    
691                         dst->wb = &wb->inc;       
692                 }                                 
693         } else {                                  
694                 mutex_lock(&wb->inc.lock);        
695                 dst->wb = &wb->inc;               
696         }                                         
697                                                   
698         dst->room = darray_room(dst->wb->keys)    
699         if (dst->wb == &wb->flushing)             
700                 dst->room = min(dst->room, wb-    
701         dst->seq = seq;                           
702                                                   
703         bch2_journal_pin_add(&c->journal, seq,    
704                              bch2_btree_write_    
705                                                   
706         darray_for_each(wb->accounting, i)        
707                 memset(&i->k.v, 0, bkey_val_by    
708 }                                                 
709                                                   
710 int bch2_journal_keys_to_write_buffer_end(stru    
711 {                                                 
712         struct btree_write_buffer *wb = &c->bt    
713         unsigned live_accounting_keys = 0;        
714         int ret = 0;                              
715                                                   
716         darray_for_each(wb->accounting, i)        
717                 if (!bch2_accounting_key_is_ze    
718                         i->journal_seq = dst->    
719                         live_accounting_keys++    
720                         ret = __bch2_journal_k    
721                         if (ret)                  
722                                 break;            
723                 }                                 
724                                                   
725         if (live_accounting_keys * 2 < wb->acc    
726                 struct btree_write_buffered_ke    
727                                                   
728                 darray_for_each(wb->accounting    
729                         if (!bch2_accounting_k    
730                                 *dst++ = *src;    
731                 wb->accounting.nr = dst - wb->    
732                 wb_accounting_sort(wb);           
733         }                                         
734                                                   
735         if (!dst->wb->keys.nr)                    
736                 bch2_journal_pin_drop(&c->jour    
737                                                   
738         if (bch2_btree_write_buffer_should_flu    
739             __bch2_write_ref_tryget(c, BCH_WRI    
740             !queue_work(system_unbound_wq, &c-    
741                 bch2_write_ref_put(c, BCH_WRIT    
742                                                   
743         if (dst->wb == &wb->flushing)             
744                 mutex_unlock(&wb->flushing.loc    
745         mutex_unlock(&wb->inc.lock);              
746                                                   
747         return ret;                               
748 }                                                 
749                                                   
750 static int bch2_journal_keys_to_write_buffer(s    
751 {                                                 
752         struct journal_keys_to_wb dst;            
753         int ret = 0;                              
754                                                   
755         bch2_journal_keys_to_write_buffer_star    
756                                                   
757         for_each_jset_entry_type(entry, buf->d    
758                 jset_entry_for_each_key(entry,    
759                         ret = bch2_journal_key    
760                         if (ret)                  
761                                 goto out;         
762                 }                                 
763                                                   
764                 entry->type = BCH_JSET_ENTRY_b    
765         }                                         
766                                                   
767         spin_lock(&c->journal.lock);              
768         buf->need_flush_to_write_buffer = fals    
769         spin_unlock(&c->journal.lock);            
770 out:                                              
771         ret = bch2_journal_keys_to_write_buffe    
772         return ret;                               
773 }                                                 
774                                                   
775 static int wb_keys_resize(struct btree_write_b    
776 {                                                 
777         if (wb->keys.size >= new_size)            
778                 return 0;                         
779                                                   
780         if (!mutex_trylock(&wb->lock))            
781                 return -EINTR;                    
782                                                   
783         int ret = darray_resize(&wb->keys, new    
784         mutex_unlock(&wb->lock);                  
785         return ret;                               
786 }                                                 
787                                                   
788 int bch2_btree_write_buffer_resize(struct bch_    
789 {                                                 
790         struct btree_write_buffer *wb = &c->bt    
791                                                   
792         return wb_keys_resize(&wb->flushing, n    
793                 wb_keys_resize(&wb->inc, new_s    
794 }                                                 
795                                                   
796 void bch2_fs_btree_write_buffer_exit(struct bc    
797 {                                                 
798         struct btree_write_buffer *wb = &c->bt    
799                                                   
800         BUG_ON((wb->inc.keys.nr || wb->flushin    
801                !bch2_journal_error(&c->journal    
802                                                   
803         darray_exit(&wb->accounting);             
804         darray_exit(&wb->sorted);                 
805         darray_exit(&wb->flushing.keys);          
806         darray_exit(&wb->inc.keys);               
807 }                                                 
808                                                   
809 int bch2_fs_btree_write_buffer_init(struct bch    
810 {                                                 
811         struct btree_write_buffer *wb = &c->bt    
812                                                   
813         mutex_init(&wb->inc.lock);                
814         mutex_init(&wb->flushing.lock);           
815         INIT_WORK(&wb->flush_work, bch2_btree_    
816                                                   
817         /* Will be resized by journal as neede    
818         unsigned initial_size = 1 << 16;          
819                                                   
820         return  darray_make_room(&wb->inc.keys    
821                 darray_make_room(&wb->flushing    
822                 darray_make_room(&wb->sorted,     
823 }                                                 
824                                                   

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php