~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/fs/btrfs/extent_map.c

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0
  2 
  3 #include <linux/err.h>
  4 #include <linux/slab.h>
  5 #include <linux/spinlock.h>
  6 #include "messages.h"
  7 #include "ctree.h"
  8 #include "extent_map.h"
  9 #include "compression.h"
 10 #include "btrfs_inode.h"
 11 #include "disk-io.h"
 12 
 13 
 14 static struct kmem_cache *extent_map_cache;
 15 
 16 int __init extent_map_init(void)
 17 {
 18         extent_map_cache = kmem_cache_create("btrfs_extent_map",
 19                                              sizeof(struct extent_map), 0, 0, NULL);
 20         if (!extent_map_cache)
 21                 return -ENOMEM;
 22         return 0;
 23 }
 24 
 25 void __cold extent_map_exit(void)
 26 {
 27         kmem_cache_destroy(extent_map_cache);
 28 }
 29 
 30 /*
 31  * Initialize the extent tree @tree.  Should be called for each new inode or
 32  * other user of the extent_map interface.
 33  */
 34 void extent_map_tree_init(struct extent_map_tree *tree)
 35 {
 36         tree->root = RB_ROOT;
 37         INIT_LIST_HEAD(&tree->modified_extents);
 38         rwlock_init(&tree->lock);
 39 }
 40 
 41 /*
 42  * Allocate a new extent_map structure.  The new structure is returned with a
 43  * reference count of one and needs to be freed using free_extent_map()
 44  */
 45 struct extent_map *alloc_extent_map(void)
 46 {
 47         struct extent_map *em;
 48         em = kmem_cache_zalloc(extent_map_cache, GFP_NOFS);
 49         if (!em)
 50                 return NULL;
 51         RB_CLEAR_NODE(&em->rb_node);
 52         refcount_set(&em->refs, 1);
 53         INIT_LIST_HEAD(&em->list);
 54         return em;
 55 }
 56 
 57 /*
 58  * Drop the reference out on @em by one and free the structure if the reference
 59  * count hits zero.
 60  */
 61 void free_extent_map(struct extent_map *em)
 62 {
 63         if (!em)
 64                 return;
 65         if (refcount_dec_and_test(&em->refs)) {
 66                 WARN_ON(extent_map_in_tree(em));
 67                 WARN_ON(!list_empty(&em->list));
 68                 kmem_cache_free(extent_map_cache, em);
 69         }
 70 }
 71 
 72 /* Do the math around the end of an extent, handling wrapping. */
 73 static u64 range_end(u64 start, u64 len)
 74 {
 75         if (start + len < start)
 76                 return (u64)-1;
 77         return start + len;
 78 }
 79 
 80 static void dec_evictable_extent_maps(struct btrfs_inode *inode)
 81 {
 82         struct btrfs_fs_info *fs_info = inode->root->fs_info;
 83 
 84         if (!btrfs_is_testing(fs_info) && is_fstree(btrfs_root_id(inode->root)))
 85                 percpu_counter_dec(&fs_info->evictable_extent_maps);
 86 }
 87 
 88 static int tree_insert(struct rb_root *root, struct extent_map *em)
 89 {
 90         struct rb_node **p = &root->rb_node;
 91         struct rb_node *parent = NULL;
 92         struct extent_map *entry = NULL;
 93         struct rb_node *orig_parent = NULL;
 94         u64 end = range_end(em->start, em->len);
 95 
 96         while (*p) {
 97                 parent = *p;
 98                 entry = rb_entry(parent, struct extent_map, rb_node);
 99 
100                 if (em->start < entry->start)
101                         p = &(*p)->rb_left;
102                 else if (em->start >= extent_map_end(entry))
103                         p = &(*p)->rb_right;
104                 else
105                         return -EEXIST;
106         }
107 
108         orig_parent = parent;
109         while (parent && em->start >= extent_map_end(entry)) {
110                 parent = rb_next(parent);
111                 entry = rb_entry(parent, struct extent_map, rb_node);
112         }
113         if (parent)
114                 if (end > entry->start && em->start < extent_map_end(entry))
115                         return -EEXIST;
116 
117         parent = orig_parent;
118         entry = rb_entry(parent, struct extent_map, rb_node);
119         while (parent && em->start < entry->start) {
120                 parent = rb_prev(parent);
121                 entry = rb_entry(parent, struct extent_map, rb_node);
122         }
123         if (parent)
124                 if (end > entry->start && em->start < extent_map_end(entry))
125                         return -EEXIST;
126 
127         rb_link_node(&em->rb_node, orig_parent, p);
128         rb_insert_color(&em->rb_node, root);
129         return 0;
130 }
131 
132 /*
133  * Search through the tree for an extent_map with a given offset.  If it can't
134  * be found, try to find some neighboring extents
135  */
136 static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
137                                      struct rb_node **prev_or_next_ret)
138 {
139         struct rb_node *n = root->rb_node;
140         struct rb_node *prev = NULL;
141         struct rb_node *orig_prev = NULL;
142         struct extent_map *entry;
143         struct extent_map *prev_entry = NULL;
144 
145         ASSERT(prev_or_next_ret);
146 
147         while (n) {
148                 entry = rb_entry(n, struct extent_map, rb_node);
149                 prev = n;
150                 prev_entry = entry;
151 
152                 if (offset < entry->start)
153                         n = n->rb_left;
154                 else if (offset >= extent_map_end(entry))
155                         n = n->rb_right;
156                 else
157                         return n;
158         }
159 
160         orig_prev = prev;
161         while (prev && offset >= extent_map_end(prev_entry)) {
162                 prev = rb_next(prev);
163                 prev_entry = rb_entry(prev, struct extent_map, rb_node);
164         }
165 
166         /*
167          * Previous extent map found, return as in this case the caller does not
168          * care about the next one.
169          */
170         if (prev) {
171                 *prev_or_next_ret = prev;
172                 return NULL;
173         }
174 
175         prev = orig_prev;
176         prev_entry = rb_entry(prev, struct extent_map, rb_node);
177         while (prev && offset < prev_entry->start) {
178                 prev = rb_prev(prev);
179                 prev_entry = rb_entry(prev, struct extent_map, rb_node);
180         }
181         *prev_or_next_ret = prev;
182 
183         return NULL;
184 }
185 
186 static inline u64 extent_map_block_len(const struct extent_map *em)
187 {
188         if (extent_map_is_compressed(em))
189                 return em->disk_num_bytes;
190         return em->len;
191 }
192 
193 static inline u64 extent_map_block_end(const struct extent_map *em)
194 {
195         if (extent_map_block_start(em) + extent_map_block_len(em) <
196             extent_map_block_start(em))
197                 return (u64)-1;
198         return extent_map_block_start(em) + extent_map_block_len(em);
199 }
200 
201 static bool can_merge_extent_map(const struct extent_map *em)
202 {
203         if (em->flags & EXTENT_FLAG_PINNED)
204                 return false;
205 
206         /* Don't merge compressed extents, we need to know their actual size. */
207         if (extent_map_is_compressed(em))
208                 return false;
209 
210         if (em->flags & EXTENT_FLAG_LOGGING)
211                 return false;
212 
213         /*
214          * We don't want to merge stuff that hasn't been written to the log yet
215          * since it may not reflect exactly what is on disk, and that would be
216          * bad.
217          */
218         if (!list_empty(&em->list))
219                 return false;
220 
221         return true;
222 }
223 
224 /* Check to see if two extent_map structs are adjacent and safe to merge. */
225 static bool mergeable_maps(const struct extent_map *prev, const struct extent_map *next)
226 {
227         if (extent_map_end(prev) != next->start)
228                 return false;
229 
230         if (prev->flags != next->flags)
231                 return false;
232 
233         if (next->disk_bytenr < EXTENT_MAP_LAST_BYTE - 1)
234                 return extent_map_block_start(next) == extent_map_block_end(prev);
235 
236         /* HOLES and INLINE extents. */
237         return next->disk_bytenr == prev->disk_bytenr;
238 }
239 
240 /*
241  * Handle the on-disk data extents merge for @prev and @next.
242  *
243  * Only touches disk_bytenr/disk_num_bytes/offset/ram_bytes.
244  * For now only uncompressed regular extent can be merged.
245  *
246  * @prev and @next will be both updated to point to the new merged range.
247  * Thus one of them should be removed by the caller.
248  */
249 static void merge_ondisk_extents(struct extent_map *prev, struct extent_map *next)
250 {
251         u64 new_disk_bytenr;
252         u64 new_disk_num_bytes;
253         u64 new_offset;
254 
255         /* @prev and @next should not be compressed. */
256         ASSERT(!extent_map_is_compressed(prev));
257         ASSERT(!extent_map_is_compressed(next));
258 
259         /*
260          * There are two different cases where @prev and @next can be merged.
261          *
262          * 1) They are referring to the same data extent:
263          *
264          * |<----- data extent A ----->|
265          *    |<- prev ->|<- next ->|
266          *
267          * 2) They are referring to different data extents but still adjacent:
268          *
269          * |<-- data extent A -->|<-- data extent B -->|
270          *            |<- prev ->|<- next ->|
271          *
272          * The calculation here always merges the data extents first, then updates
273          * @offset using the new data extents.
274          *
275          * For case 1), the merged data extent would be the same.
276          * For case 2), we just merge the two data extents into one.
277          */
278         new_disk_bytenr = min(prev->disk_bytenr, next->disk_bytenr);
279         new_disk_num_bytes = max(prev->disk_bytenr + prev->disk_num_bytes,
280                                  next->disk_bytenr + next->disk_num_bytes) -
281                              new_disk_bytenr;
282         new_offset = prev->disk_bytenr + prev->offset - new_disk_bytenr;
283 
284         prev->disk_bytenr = new_disk_bytenr;
285         prev->disk_num_bytes = new_disk_num_bytes;
286         prev->ram_bytes = new_disk_num_bytes;
287         prev->offset = new_offset;
288 
289         next->disk_bytenr = new_disk_bytenr;
290         next->disk_num_bytes = new_disk_num_bytes;
291         next->ram_bytes = new_disk_num_bytes;
292         next->offset = new_offset;
293 }
294 
295 static void dump_extent_map(struct btrfs_fs_info *fs_info, const char *prefix,
296                             struct extent_map *em)
297 {
298         if (!IS_ENABLED(CONFIG_BTRFS_DEBUG))
299                 return;
300         btrfs_crit(fs_info,
301 "%s, start=%llu len=%llu disk_bytenr=%llu disk_num_bytes=%llu ram_bytes=%llu offset=%llu flags=0x%x",
302                 prefix, em->start, em->len, em->disk_bytenr, em->disk_num_bytes,
303                 em->ram_bytes, em->offset, em->flags);
304         ASSERT(0);
305 }
306 
307 /* Internal sanity checks for btrfs debug builds. */
308 static void validate_extent_map(struct btrfs_fs_info *fs_info, struct extent_map *em)
309 {
310         if (!IS_ENABLED(CONFIG_BTRFS_DEBUG))
311                 return;
312         if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE) {
313                 if (em->disk_num_bytes == 0)
314                         dump_extent_map(fs_info, "zero disk_num_bytes", em);
315                 if (em->offset + em->len > em->ram_bytes)
316                         dump_extent_map(fs_info, "ram_bytes too small", em);
317                 if (em->offset + em->len > em->disk_num_bytes &&
318                     !extent_map_is_compressed(em))
319                         dump_extent_map(fs_info, "disk_num_bytes too small", em);
320                 if (!extent_map_is_compressed(em) &&
321                     em->ram_bytes != em->disk_num_bytes)
322                         dump_extent_map(fs_info,
323                 "ram_bytes mismatch with disk_num_bytes for non-compressed em",
324                                         em);
325         } else if (em->offset) {
326                 dump_extent_map(fs_info, "non-zero offset for hole/inline", em);
327         }
328 }
329 
330 static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em)
331 {
332         struct btrfs_fs_info *fs_info = inode->root->fs_info;
333         struct extent_map_tree *tree = &inode->extent_tree;
334         struct extent_map *merge = NULL;
335         struct rb_node *rb;
336 
337         /*
338          * We can't modify an extent map that is in the tree and that is being
339          * used by another task, as it can cause that other task to see it in
340          * inconsistent state during the merging. We always have 1 reference for
341          * the tree and 1 for this task (which is unpinning the extent map or
342          * clearing the logging flag), so anything > 2 means it's being used by
343          * other tasks too.
344          */
345         if (refcount_read(&em->refs) > 2)
346                 return;
347 
348         if (!can_merge_extent_map(em))
349                 return;
350 
351         if (em->start != 0) {
352                 rb = rb_prev(&em->rb_node);
353                 if (rb)
354                         merge = rb_entry(rb, struct extent_map, rb_node);
355                 if (rb && can_merge_extent_map(merge) && mergeable_maps(merge, em)) {
356                         em->start = merge->start;
357                         em->len += merge->len;
358                         em->generation = max(em->generation, merge->generation);
359 
360                         if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE)
361                                 merge_ondisk_extents(merge, em);
362                         em->flags |= EXTENT_FLAG_MERGED;
363 
364                         validate_extent_map(fs_info, em);
365                         rb_erase(&merge->rb_node, &tree->root);
366                         RB_CLEAR_NODE(&merge->rb_node);
367                         free_extent_map(merge);
368                         dec_evictable_extent_maps(inode);
369                 }
370         }
371 
372         rb = rb_next(&em->rb_node);
373         if (rb)
374                 merge = rb_entry(rb, struct extent_map, rb_node);
375         if (rb && can_merge_extent_map(merge) && mergeable_maps(em, merge)) {
376                 em->len += merge->len;
377                 if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE)
378                         merge_ondisk_extents(em, merge);
379                 validate_extent_map(fs_info, em);
380                 rb_erase(&merge->rb_node, &tree->root);
381                 RB_CLEAR_NODE(&merge->rb_node);
382                 em->generation = max(em->generation, merge->generation);
383                 em->flags |= EXTENT_FLAG_MERGED;
384                 free_extent_map(merge);
385                 dec_evictable_extent_maps(inode);
386         }
387 }
388 
389 /*
390  * Unpin an extent from the cache.
391  *
392  * @inode:      the inode from which we are unpinning an extent range
393  * @start:      logical offset in the file
394  * @len:        length of the extent
395  * @gen:        generation that this extent has been modified in
396  *
397  * Called after an extent has been written to disk properly.  Set the generation
398  * to the generation that actually added the file item to the inode so we know
399  * we need to sync this extent when we call fsync().
400  *
401  * Returns: 0        on success
402  *          -ENOENT  when the extent is not found in the tree
403  *          -EUCLEAN if the found extent does not match the expected start
404  */
405 int unpin_extent_cache(struct btrfs_inode *inode, u64 start, u64 len, u64 gen)
406 {
407         struct btrfs_fs_info *fs_info = inode->root->fs_info;
408         struct extent_map_tree *tree = &inode->extent_tree;
409         int ret = 0;
410         struct extent_map *em;
411 
412         write_lock(&tree->lock);
413         em = lookup_extent_mapping(tree, start, len);
414 
415         if (WARN_ON(!em)) {
416                 btrfs_warn(fs_info,
417 "no extent map found for inode %llu (root %lld) when unpinning extent range [%llu, %llu), generation %llu",
418                            btrfs_ino(inode), btrfs_root_id(inode->root),
419                            start, start + len, gen);
420                 ret = -ENOENT;
421                 goto out;
422         }
423 
424         if (WARN_ON(em->start != start)) {
425                 btrfs_warn(fs_info,
426 "found extent map for inode %llu (root %lld) with unexpected start offset %llu when unpinning extent range [%llu, %llu), generation %llu",
427                            btrfs_ino(inode), btrfs_root_id(inode->root),
428                            em->start, start, start + len, gen);
429                 ret = -EUCLEAN;
430                 goto out;
431         }
432 
433         em->generation = gen;
434         em->flags &= ~EXTENT_FLAG_PINNED;
435 
436         try_merge_map(inode, em);
437 
438 out:
439         write_unlock(&tree->lock);
440         free_extent_map(em);
441         return ret;
442 
443 }
444 
445 void clear_em_logging(struct btrfs_inode *inode, struct extent_map *em)
446 {
447         lockdep_assert_held_write(&inode->extent_tree.lock);
448 
449         em->flags &= ~EXTENT_FLAG_LOGGING;
450         if (extent_map_in_tree(em))
451                 try_merge_map(inode, em);
452 }
453 
454 static inline void setup_extent_mapping(struct btrfs_inode *inode,
455                                         struct extent_map *em,
456                                         int modified)
457 {
458         refcount_inc(&em->refs);
459 
460         ASSERT(list_empty(&em->list));
461 
462         if (modified)
463                 list_add(&em->list, &inode->extent_tree.modified_extents);
464         else
465                 try_merge_map(inode, em);
466 }
467 
468 /*
469  * Add a new extent map to an inode's extent map tree.
470  *
471  * @inode:      the target inode
472  * @em:         map to insert
473  * @modified:   indicate whether the given @em should be added to the
474  *              modified list, which indicates the extent needs to be logged
475  *
476  * Insert @em into the @inode's extent map tree or perform a simple
477  * forward/backward merge with existing mappings.  The extent_map struct passed
478  * in will be inserted into the tree directly, with an additional reference
479  * taken, or a reference dropped if the merge attempt was successful.
480  */
481 static int add_extent_mapping(struct btrfs_inode *inode,
482                               struct extent_map *em, int modified)
483 {
484         struct extent_map_tree *tree = &inode->extent_tree;
485         struct btrfs_root *root = inode->root;
486         struct btrfs_fs_info *fs_info = root->fs_info;
487         int ret;
488 
489         lockdep_assert_held_write(&tree->lock);
490 
491         validate_extent_map(fs_info, em);
492         ret = tree_insert(&tree->root, em);
493         if (ret)
494                 return ret;
495 
496         setup_extent_mapping(inode, em, modified);
497 
498         if (!btrfs_is_testing(fs_info) && is_fstree(btrfs_root_id(root)))
499                 percpu_counter_inc(&fs_info->evictable_extent_maps);
500 
501         return 0;
502 }
503 
504 static struct extent_map *
505 __lookup_extent_mapping(struct extent_map_tree *tree,
506                         u64 start, u64 len, int strict)
507 {
508         struct extent_map *em;
509         struct rb_node *rb_node;
510         struct rb_node *prev_or_next = NULL;
511         u64 end = range_end(start, len);
512 
513         rb_node = __tree_search(&tree->root, start, &prev_or_next);
514         if (!rb_node) {
515                 if (prev_or_next)
516                         rb_node = prev_or_next;
517                 else
518                         return NULL;
519         }
520 
521         em = rb_entry(rb_node, struct extent_map, rb_node);
522 
523         if (strict && !(end > em->start && start < extent_map_end(em)))
524                 return NULL;
525 
526         refcount_inc(&em->refs);
527         return em;
528 }
529 
530 /*
531  * Lookup extent_map that intersects @start + @len range.
532  *
533  * @tree:       tree to lookup in
534  * @start:      byte offset to start the search
535  * @len:        length of the lookup range
536  *
537  * Find and return the first extent_map struct in @tree that intersects the
538  * [start, len] range.  There may be additional objects in the tree that
539  * intersect, so check the object returned carefully to make sure that no
540  * additional lookups are needed.
541  */
542 struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
543                                          u64 start, u64 len)
544 {
545         return __lookup_extent_mapping(tree, start, len, 1);
546 }
547 
548 /*
549  * Find a nearby extent map intersecting @start + @len (not an exact search).
550  *
551  * @tree:       tree to lookup in
552  * @start:      byte offset to start the search
553  * @len:        length of the lookup range
554  *
555  * Find and return the first extent_map struct in @tree that intersects the
556  * [start, len] range.
557  *
558  * If one can't be found, any nearby extent may be returned
559  */
560 struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
561                                          u64 start, u64 len)
562 {
563         return __lookup_extent_mapping(tree, start, len, 0);
564 }
565 
566 /*
567  * Remove an extent_map from its inode's extent tree.
568  *
569  * @inode:      the inode the extent map belongs to
570  * @em:         extent map being removed
571  *
572  * Remove @em from the extent tree of @inode.  No reference counts are dropped,
573  * and no checks are done to see if the range is in use.
574  */
575 void remove_extent_mapping(struct btrfs_inode *inode, struct extent_map *em)
576 {
577         struct extent_map_tree *tree = &inode->extent_tree;
578 
579         lockdep_assert_held_write(&tree->lock);
580 
581         WARN_ON(em->flags & EXTENT_FLAG_PINNED);
582         rb_erase(&em->rb_node, &tree->root);
583         if (!(em->flags & EXTENT_FLAG_LOGGING))
584                 list_del_init(&em->list);
585         RB_CLEAR_NODE(&em->rb_node);
586 
587         dec_evictable_extent_maps(inode);
588 }
589 
590 static void replace_extent_mapping(struct btrfs_inode *inode,
591                                    struct extent_map *cur,
592                                    struct extent_map *new,
593                                    int modified)
594 {
595         struct btrfs_fs_info *fs_info = inode->root->fs_info;
596         struct extent_map_tree *tree = &inode->extent_tree;
597 
598         lockdep_assert_held_write(&tree->lock);
599 
600         validate_extent_map(fs_info, new);
601 
602         WARN_ON(cur->flags & EXTENT_FLAG_PINNED);
603         ASSERT(extent_map_in_tree(cur));
604         if (!(cur->flags & EXTENT_FLAG_LOGGING))
605                 list_del_init(&cur->list);
606         rb_replace_node(&cur->rb_node, &new->rb_node, &tree->root);
607         RB_CLEAR_NODE(&cur->rb_node);
608 
609         setup_extent_mapping(inode, new, modified);
610 }
611 
612 static struct extent_map *next_extent_map(const struct extent_map *em)
613 {
614         struct rb_node *next;
615 
616         next = rb_next(&em->rb_node);
617         if (!next)
618                 return NULL;
619         return container_of(next, struct extent_map, rb_node);
620 }
621 
622 static struct extent_map *prev_extent_map(struct extent_map *em)
623 {
624         struct rb_node *prev;
625 
626         prev = rb_prev(&em->rb_node);
627         if (!prev)
628                 return NULL;
629         return container_of(prev, struct extent_map, rb_node);
630 }
631 
632 /*
633  * Helper for btrfs_get_extent.  Given an existing extent in the tree,
634  * the existing extent is the nearest extent to map_start,
635  * and an extent that you want to insert, deal with overlap and insert
636  * the best fitted new extent into the tree.
637  */
638 static noinline int merge_extent_mapping(struct btrfs_inode *inode,
639                                          struct extent_map *existing,
640                                          struct extent_map *em,
641                                          u64 map_start)
642 {
643         struct extent_map *prev;
644         struct extent_map *next;
645         u64 start;
646         u64 end;
647         u64 start_diff;
648 
649         if (map_start < em->start || map_start >= extent_map_end(em))
650                 return -EINVAL;
651 
652         if (existing->start > map_start) {
653                 next = existing;
654                 prev = prev_extent_map(next);
655         } else {
656                 prev = existing;
657                 next = next_extent_map(prev);
658         }
659 
660         start = prev ? extent_map_end(prev) : em->start;
661         start = max_t(u64, start, em->start);
662         end = next ? next->start : extent_map_end(em);
663         end = min_t(u64, end, extent_map_end(em));
664         start_diff = start - em->start;
665         em->start = start;
666         em->len = end - start;
667         if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE)
668                 em->offset += start_diff;
669         return add_extent_mapping(inode, em, 0);
670 }
671 
672 /*
673  * Add extent mapping into an inode's extent map tree.
674  *
675  * @inode:    target inode
676  * @em_in:    extent we are inserting
677  * @start:    start of the logical range btrfs_get_extent() is requesting
678  * @len:      length of the logical range btrfs_get_extent() is requesting
679  *
680  * Note that @em_in's range may be different from [start, start+len),
681  * but they must be overlapped.
682  *
683  * Insert @em_in into the inode's extent map tree. In case there is an
684  * overlapping range, handle the -EEXIST by either:
685  * a) Returning the existing extent in @em_in if @start is within the
686  *    existing em.
687  * b) Merge the existing extent with @em_in passed in.
688  *
689  * Return 0 on success, otherwise -EEXIST.
690  *
691  */
692 int btrfs_add_extent_mapping(struct btrfs_inode *inode,
693                              struct extent_map **em_in, u64 start, u64 len)
694 {
695         int ret;
696         struct extent_map *em = *em_in;
697         struct btrfs_fs_info *fs_info = inode->root->fs_info;
698 
699         /*
700          * Tree-checker should have rejected any inline extent with non-zero
701          * file offset. Here just do a sanity check.
702          */
703         if (em->disk_bytenr == EXTENT_MAP_INLINE)
704                 ASSERT(em->start == 0);
705 
706         ret = add_extent_mapping(inode, em, 0);
707         /* it is possible that someone inserted the extent into the tree
708          * while we had the lock dropped.  It is also possible that
709          * an overlapping map exists in the tree
710          */
711         if (ret == -EEXIST) {
712                 struct extent_map *existing;
713 
714                 existing = search_extent_mapping(&inode->extent_tree, start, len);
715 
716                 trace_btrfs_handle_em_exist(fs_info, existing, em, start, len);
717 
718                 /*
719                  * existing will always be non-NULL, since there must be
720                  * extent causing the -EEXIST.
721                  */
722                 if (start >= existing->start &&
723                     start < extent_map_end(existing)) {
724                         free_extent_map(em);
725                         *em_in = existing;
726                         ret = 0;
727                 } else {
728                         u64 orig_start = em->start;
729                         u64 orig_len = em->len;
730 
731                         /*
732                          * The existing extent map is the one nearest to
733                          * the [start, start + len) range which overlaps
734                          */
735                         ret = merge_extent_mapping(inode, existing, em, start);
736                         if (WARN_ON(ret)) {
737                                 free_extent_map(em);
738                                 *em_in = NULL;
739                                 btrfs_warn(fs_info,
740 "extent map merge error existing [%llu, %llu) with em [%llu, %llu) start %llu",
741                                            existing->start, extent_map_end(existing),
742                                            orig_start, orig_start + orig_len, start);
743                         }
744                         free_extent_map(existing);
745                 }
746         }
747 
748         ASSERT(ret == 0 || ret == -EEXIST);
749         return ret;
750 }
751 
752 /*
753  * Drop all extent maps from a tree in the fastest possible way, rescheduling
754  * if needed. This avoids searching the tree, from the root down to the first
755  * extent map, before each deletion.
756  */
757 static void drop_all_extent_maps_fast(struct btrfs_inode *inode)
758 {
759         struct extent_map_tree *tree = &inode->extent_tree;
760         struct rb_node *node;
761 
762         write_lock(&tree->lock);
763         node = rb_first(&tree->root);
764         while (node) {
765                 struct extent_map *em;
766                 struct rb_node *next = rb_next(node);
767 
768                 em = rb_entry(node, struct extent_map, rb_node);
769                 em->flags &= ~(EXTENT_FLAG_PINNED | EXTENT_FLAG_LOGGING);
770                 remove_extent_mapping(inode, em);
771                 free_extent_map(em);
772 
773                 if (cond_resched_rwlock_write(&tree->lock))
774                         node = rb_first(&tree->root);
775                 else
776                         node = next;
777         }
778         write_unlock(&tree->lock);
779 }
780 
781 /*
782  * Drop all extent maps in a given range.
783  *
784  * @inode:       The target inode.
785  * @start:       Start offset of the range.
786  * @end:         End offset of the range (inclusive value).
787  * @skip_pinned: Indicate if pinned extent maps should be ignored or not.
788  *
789  * This drops all the extent maps that intersect the given range [@start, @end].
790  * Extent maps that partially overlap the range and extend behind or beyond it,
791  * are split.
792  * The caller should have locked an appropriate file range in the inode's io
793  * tree before calling this function.
794  */
795 void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
796                                  bool skip_pinned)
797 {
798         struct extent_map *split;
799         struct extent_map *split2;
800         struct extent_map *em;
801         struct extent_map_tree *em_tree = &inode->extent_tree;
802         u64 len = end - start + 1;
803 
804         WARN_ON(end < start);
805         if (end == (u64)-1) {
806                 if (start == 0 && !skip_pinned) {
807                         drop_all_extent_maps_fast(inode);
808                         return;
809                 }
810                 len = (u64)-1;
811         } else {
812                 /* Make end offset exclusive for use in the loop below. */
813                 end++;
814         }
815 
816         /*
817          * It's ok if we fail to allocate the extent maps, see the comment near
818          * the bottom of the loop below. We only need two spare extent maps in
819          * the worst case, where the first extent map that intersects our range
820          * starts before the range and the last extent map that intersects our
821          * range ends after our range (and they might be the same extent map),
822          * because we need to split those two extent maps at the boundaries.
823          */
824         split = alloc_extent_map();
825         split2 = alloc_extent_map();
826 
827         write_lock(&em_tree->lock);
828         em = lookup_extent_mapping(em_tree, start, len);
829 
830         while (em) {
831                 /* extent_map_end() returns exclusive value (last byte + 1). */
832                 const u64 em_end = extent_map_end(em);
833                 struct extent_map *next_em = NULL;
834                 u64 gen;
835                 unsigned long flags;
836                 bool modified;
837 
838                 if (em_end < end) {
839                         next_em = next_extent_map(em);
840                         if (next_em) {
841                                 if (next_em->start < end)
842                                         refcount_inc(&next_em->refs);
843                                 else
844                                         next_em = NULL;
845                         }
846                 }
847 
848                 if (skip_pinned && (em->flags & EXTENT_FLAG_PINNED)) {
849                         start = em_end;
850                         goto next;
851                 }
852 
853                 flags = em->flags;
854                 /*
855                  * In case we split the extent map, we want to preserve the
856                  * EXTENT_FLAG_LOGGING flag on our extent map, but we don't want
857                  * it on the new extent maps.
858                  */
859                 em->flags &= ~(EXTENT_FLAG_PINNED | EXTENT_FLAG_LOGGING);
860                 modified = !list_empty(&em->list);
861 
862                 /*
863                  * The extent map does not cross our target range, so no need to
864                  * split it, we can remove it directly.
865                  */
866                 if (em->start >= start && em_end <= end)
867                         goto remove_em;
868 
869                 gen = em->generation;
870 
871                 if (em->start < start) {
872                         if (!split) {
873                                 split = split2;
874                                 split2 = NULL;
875                                 if (!split)
876                                         goto remove_em;
877                         }
878                         split->start = em->start;
879                         split->len = start - em->start;
880 
881                         if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE) {
882                                 split->disk_bytenr = em->disk_bytenr;
883                                 split->disk_num_bytes = em->disk_num_bytes;
884                                 split->offset = em->offset;
885                                 split->ram_bytes = em->ram_bytes;
886                         } else {
887                                 split->disk_bytenr = em->disk_bytenr;
888                                 split->disk_num_bytes = 0;
889                                 split->offset = 0;
890                                 split->ram_bytes = split->len;
891                         }
892 
893                         split->generation = gen;
894                         split->flags = flags;
895                         replace_extent_mapping(inode, em, split, modified);
896                         free_extent_map(split);
897                         split = split2;
898                         split2 = NULL;
899                 }
900                 if (em_end > end) {
901                         if (!split) {
902                                 split = split2;
903                                 split2 = NULL;
904                                 if (!split)
905                                         goto remove_em;
906                         }
907                         split->start = end;
908                         split->len = em_end - end;
909                         split->disk_bytenr = em->disk_bytenr;
910                         split->flags = flags;
911                         split->generation = gen;
912 
913                         if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE) {
914                                 split->disk_num_bytes = em->disk_num_bytes;
915                                 split->offset = em->offset + end - em->start;
916                                 split->ram_bytes = em->ram_bytes;
917                         } else {
918                                 split->disk_num_bytes = 0;
919                                 split->offset = 0;
920                                 split->ram_bytes = split->len;
921                         }
922 
923                         if (extent_map_in_tree(em)) {
924                                 replace_extent_mapping(inode, em, split, modified);
925                         } else {
926                                 int ret;
927 
928                                 ret = add_extent_mapping(inode, split, modified);
929                                 /* Logic error, shouldn't happen. */
930                                 ASSERT(ret == 0);
931                                 if (WARN_ON(ret != 0) && modified)
932                                         btrfs_set_inode_full_sync(inode);
933                         }
934                         free_extent_map(split);
935                         split = NULL;
936                 }
937 remove_em:
938                 if (extent_map_in_tree(em)) {
939                         /*
940                          * If the extent map is still in the tree it means that
941                          * either of the following is true:
942                          *
943                          * 1) It fits entirely in our range (doesn't end beyond
944                          *    it or starts before it);
945                          *
946                          * 2) It starts before our range and/or ends after our
947                          *    range, and we were not able to allocate the extent
948                          *    maps for split operations, @split and @split2.
949                          *
950                          * If we are at case 2) then we just remove the entire
951                          * extent map - this is fine since if anyone needs it to
952                          * access the subranges outside our range, will just
953                          * load it again from the subvolume tree's file extent
954                          * item. However if the extent map was in the list of
955                          * modified extents, then we must mark the inode for a
956                          * full fsync, otherwise a fast fsync will miss this
957                          * extent if it's new and needs to be logged.
958                          */
959                         if ((em->start < start || em_end > end) && modified) {
960                                 ASSERT(!split);
961                                 btrfs_set_inode_full_sync(inode);
962                         }
963                         remove_extent_mapping(inode, em);
964                 }
965 
966                 /*
967                  * Once for the tree reference (we replaced or removed the
968                  * extent map from the tree).
969                  */
970                 free_extent_map(em);
971 next:
972                 /* Once for us (for our lookup reference). */
973                 free_extent_map(em);
974 
975                 em = next_em;
976         }
977 
978         write_unlock(&em_tree->lock);
979 
980         free_extent_map(split);
981         free_extent_map(split2);
982 }
983 
984 /*
985  * Replace a range in the inode's extent map tree with a new extent map.
986  *
987  * @inode:      The target inode.
988  * @new_em:     The new extent map to add to the inode's extent map tree.
989  * @modified:   Indicate if the new extent map should be added to the list of
990  *              modified extents (for fast fsync tracking).
991  *
992  * Drops all the extent maps in the inode's extent map tree that intersect the
993  * range of the new extent map and adds the new extent map to the tree.
994  * The caller should have locked an appropriate file range in the inode's io
995  * tree before calling this function.
996  */
997 int btrfs_replace_extent_map_range(struct btrfs_inode *inode,
998                                    struct extent_map *new_em,
999                                    bool modified)
1000 {
1001         const u64 end = new_em->start + new_em->len - 1;
1002         struct extent_map_tree *tree = &inode->extent_tree;
1003         int ret;
1004 
1005         ASSERT(!extent_map_in_tree(new_em));
1006 
1007         /*
1008          * The caller has locked an appropriate file range in the inode's io
1009          * tree, but getting -EEXIST when adding the new extent map can still
1010          * happen in case there are extents that partially cover the range, and
1011          * this is due to two tasks operating on different parts of the extent.
1012          * See commit 18e83ac75bfe67 ("Btrfs: fix unexpected EEXIST from
1013          * btrfs_get_extent") for an example and details.
1014          */
1015         do {
1016                 btrfs_drop_extent_map_range(inode, new_em->start, end, false);
1017                 write_lock(&tree->lock);
1018                 ret = add_extent_mapping(inode, new_em, modified);
1019                 write_unlock(&tree->lock);
1020         } while (ret == -EEXIST);
1021 
1022         return ret;
1023 }
1024 
1025 /*
1026  * Split off the first pre bytes from the extent_map at [start, start + len],
1027  * and set the block_start for it to new_logical.
1028  *
1029  * This function is used when an ordered_extent needs to be split.
1030  */
1031 int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre,
1032                      u64 new_logical)
1033 {
1034         struct extent_map_tree *em_tree = &inode->extent_tree;
1035         struct extent_map *em;
1036         struct extent_map *split_pre = NULL;
1037         struct extent_map *split_mid = NULL;
1038         int ret = 0;
1039         unsigned long flags;
1040 
1041         ASSERT(pre != 0);
1042         ASSERT(pre < len);
1043 
1044         split_pre = alloc_extent_map();
1045         if (!split_pre)
1046                 return -ENOMEM;
1047         split_mid = alloc_extent_map();
1048         if (!split_mid) {
1049                 ret = -ENOMEM;
1050                 goto out_free_pre;
1051         }
1052 
1053         lock_extent(&inode->io_tree, start, start + len - 1, NULL);
1054         write_lock(&em_tree->lock);
1055         em = lookup_extent_mapping(em_tree, start, len);
1056         if (!em) {
1057                 ret = -EIO;
1058                 goto out_unlock;
1059         }
1060 
1061         ASSERT(em->len == len);
1062         ASSERT(!extent_map_is_compressed(em));
1063         ASSERT(em->disk_bytenr < EXTENT_MAP_LAST_BYTE);
1064         ASSERT(em->flags & EXTENT_FLAG_PINNED);
1065         ASSERT(!(em->flags & EXTENT_FLAG_LOGGING));
1066         ASSERT(!list_empty(&em->list));
1067 
1068         flags = em->flags;
1069         em->flags &= ~EXTENT_FLAG_PINNED;
1070 
1071         /* First, replace the em with a new extent_map starting from * em->start */
1072         split_pre->start = em->start;
1073         split_pre->len = pre;
1074         split_pre->disk_bytenr = new_logical;
1075         split_pre->disk_num_bytes = split_pre->len;
1076         split_pre->offset = 0;
1077         split_pre->ram_bytes = split_pre->len;
1078         split_pre->flags = flags;
1079         split_pre->generation = em->generation;
1080 
1081         replace_extent_mapping(inode, em, split_pre, 1);
1082 
1083         /*
1084          * Now we only have an extent_map at:
1085          *     [em->start, em->start + pre]
1086          */
1087 
1088         /* Insert the middle extent_map. */
1089         split_mid->start = em->start + pre;
1090         split_mid->len = em->len - pre;
1091         split_mid->disk_bytenr = extent_map_block_start(em) + pre;
1092         split_mid->disk_num_bytes = split_mid->len;
1093         split_mid->offset = 0;
1094         split_mid->ram_bytes = split_mid->len;
1095         split_mid->flags = flags;
1096         split_mid->generation = em->generation;
1097         add_extent_mapping(inode, split_mid, 1);
1098 
1099         /* Once for us */
1100         free_extent_map(em);
1101         /* Once for the tree */
1102         free_extent_map(em);
1103 
1104 out_unlock:
1105         write_unlock(&em_tree->lock);
1106         unlock_extent(&inode->io_tree, start, start + len - 1, NULL);
1107         free_extent_map(split_mid);
1108 out_free_pre:
1109         free_extent_map(split_pre);
1110         return ret;
1111 }
1112 
1113 struct btrfs_em_shrink_ctx {
1114         long nr_to_scan;
1115         long scanned;
1116         u64 last_ino;
1117         u64 last_root;
1118 };
1119 
1120 static long btrfs_scan_inode(struct btrfs_inode *inode, struct btrfs_em_shrink_ctx *ctx)
1121 {
1122         const u64 cur_fs_gen = btrfs_get_fs_generation(inode->root->fs_info);
1123         struct extent_map_tree *tree = &inode->extent_tree;
1124         long nr_dropped = 0;
1125         struct rb_node *node;
1126 
1127         /*
1128          * Take the mmap lock so that we serialize with the inode logging phase
1129          * of fsync because we may need to set the full sync flag on the inode,
1130          * in case we have to remove extent maps in the tree's list of modified
1131          * extents. If we set the full sync flag in the inode while an fsync is
1132          * in progress, we may risk missing new extents because before the flag
1133          * is set, fsync decides to only wait for writeback to complete and then
1134          * during inode logging it sees the flag set and uses the subvolume tree
1135          * to find new extents, which may not be there yet because ordered
1136          * extents haven't completed yet.
1137          *
1138          * We also do a try lock because otherwise we could deadlock. This is
1139          * because the shrinker for this filesystem may be invoked while we are
1140          * in a path that is holding the mmap lock in write mode. For example in
1141          * a reflink operation while COWing an extent buffer, when allocating
1142          * pages for a new extent buffer and under memory pressure, the shrinker
1143          * may be invoked, and therefore we would deadlock by attempting to read
1144          * lock the mmap lock while we are holding already a write lock on it.
1145          */
1146         if (!down_read_trylock(&inode->i_mmap_lock))
1147                 return 0;
1148 
1149         /*
1150          * We want to be fast so if the lock is busy we don't want to spend time
1151          * waiting for it - either some task is about to do IO for the inode or
1152          * we may have another task shrinking extent maps, here in this code, so
1153          * skip this inode.
1154          */
1155         if (!write_trylock(&tree->lock)) {
1156                 up_read(&inode->i_mmap_lock);
1157                 return 0;
1158         }
1159 
1160         node = rb_first(&tree->root);
1161         while (node) {
1162                 struct rb_node *next = rb_next(node);
1163                 struct extent_map *em;
1164 
1165                 em = rb_entry(node, struct extent_map, rb_node);
1166                 ctx->scanned++;
1167 
1168                 if (em->flags & EXTENT_FLAG_PINNED)
1169                         goto next;
1170 
1171                 /*
1172                  * If the inode is in the list of modified extents (new) and its
1173                  * generation is the same (or is greater than) the current fs
1174                  * generation, it means it was not yet persisted so we have to
1175                  * set the full sync flag so that the next fsync will not miss
1176                  * it.
1177                  */
1178                 if (!list_empty(&em->list) && em->generation >= cur_fs_gen)
1179                         btrfs_set_inode_full_sync(inode);
1180 
1181                 remove_extent_mapping(inode, em);
1182                 trace_btrfs_extent_map_shrinker_remove_em(inode, em);
1183                 /* Drop the reference for the tree. */
1184                 free_extent_map(em);
1185                 nr_dropped++;
1186 next:
1187                 if (ctx->scanned >= ctx->nr_to_scan)
1188                         break;
1189 
1190                 /*
1191                  * Stop if we need to reschedule or there's contention on the
1192                  * lock. This is to avoid slowing other tasks trying to take the
1193                  * lock.
1194                  */
1195                 if (need_resched() || rwlock_needbreak(&tree->lock))
1196                         break;
1197                 node = next;
1198         }
1199         write_unlock(&tree->lock);
1200         up_read(&inode->i_mmap_lock);
1201 
1202         return nr_dropped;
1203 }
1204 
1205 static long btrfs_scan_root(struct btrfs_root *root, struct btrfs_em_shrink_ctx *ctx)
1206 {
1207         struct btrfs_inode *inode;
1208         long nr_dropped = 0;
1209         u64 min_ino = ctx->last_ino + 1;
1210 
1211         inode = btrfs_find_first_inode(root, min_ino);
1212         while (inode) {
1213                 nr_dropped += btrfs_scan_inode(inode, ctx);
1214 
1215                 min_ino = btrfs_ino(inode) + 1;
1216                 ctx->last_ino = btrfs_ino(inode);
1217                 btrfs_add_delayed_iput(inode);
1218 
1219                 if (ctx->scanned >= ctx->nr_to_scan)
1220                         break;
1221 
1222                 cond_resched();
1223 
1224                 inode = btrfs_find_first_inode(root, min_ino);
1225         }
1226 
1227         if (inode) {
1228                 /*
1229                  * There are still inodes in this root or we happened to process
1230                  * the last one and reached the scan limit. In either case set
1231                  * the current root to this one, so we'll resume from the next
1232                  * inode if there is one or we will find out this was the last
1233                  * one and move to the next root.
1234                  */
1235                 ctx->last_root = btrfs_root_id(root);
1236         } else {
1237                 /*
1238                  * No more inodes in this root, set extent_map_shrinker_last_ino to 0 so
1239                  * that when processing the next root we start from its first inode.
1240                  */
1241                 ctx->last_ino = 0;
1242                 ctx->last_root = btrfs_root_id(root) + 1;
1243         }
1244 
1245         return nr_dropped;
1246 }
1247 
1248 long btrfs_free_extent_maps(struct btrfs_fs_info *fs_info, long nr_to_scan)
1249 {
1250         struct btrfs_em_shrink_ctx ctx;
1251         u64 start_root_id;
1252         u64 next_root_id;
1253         bool cycled = false;
1254         long nr_dropped = 0;
1255 
1256         ctx.scanned = 0;
1257         ctx.nr_to_scan = nr_to_scan;
1258 
1259         /*
1260          * In case we have multiple tasks running this shrinker, make the next
1261          * one start from the next inode in case it starts before we finish.
1262          */
1263         spin_lock(&fs_info->extent_map_shrinker_lock);
1264         ctx.last_ino = fs_info->extent_map_shrinker_last_ino;
1265         fs_info->extent_map_shrinker_last_ino++;
1266         ctx.last_root = fs_info->extent_map_shrinker_last_root;
1267         spin_unlock(&fs_info->extent_map_shrinker_lock);
1268 
1269         start_root_id = ctx.last_root;
1270         next_root_id = ctx.last_root;
1271 
1272         if (trace_btrfs_extent_map_shrinker_scan_enter_enabled()) {
1273                 s64 nr = percpu_counter_sum_positive(&fs_info->evictable_extent_maps);
1274 
1275                 trace_btrfs_extent_map_shrinker_scan_enter(fs_info, nr_to_scan,
1276                                                            nr, ctx.last_root,
1277                                                            ctx.last_ino);
1278         }
1279 
1280         while (ctx.scanned < ctx.nr_to_scan) {
1281                 struct btrfs_root *root;
1282                 unsigned long count;
1283 
1284                 cond_resched();
1285 
1286                 spin_lock(&fs_info->fs_roots_radix_lock);
1287                 count = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
1288                                                (void **)&root,
1289                                                (unsigned long)next_root_id, 1);
1290                 if (count == 0) {
1291                         spin_unlock(&fs_info->fs_roots_radix_lock);
1292                         if (start_root_id > 0 && !cycled) {
1293                                 next_root_id = 0;
1294                                 ctx.last_root = 0;
1295                                 ctx.last_ino = 0;
1296                                 cycled = true;
1297                                 continue;
1298                         }
1299                         break;
1300                 }
1301                 next_root_id = btrfs_root_id(root) + 1;
1302                 root = btrfs_grab_root(root);
1303                 spin_unlock(&fs_info->fs_roots_radix_lock);
1304 
1305                 if (!root)
1306                         continue;
1307 
1308                 if (is_fstree(btrfs_root_id(root)))
1309                         nr_dropped += btrfs_scan_root(root, &ctx);
1310 
1311                 btrfs_put_root(root);
1312         }
1313 
1314         /*
1315          * In case of multiple tasks running this extent map shrinking code this
1316          * isn't perfect but it's simple and silences things like KCSAN. It's
1317          * not possible to know which task made more progress because we can
1318          * cycle back to the first root and first inode if it's not the first
1319          * time the shrinker ran, see the above logic. Also a task that started
1320          * later may finish ealier than another task and made less progress. So
1321          * make this simple and update to the progress of the last task that
1322          * finished, with the occasional possiblity of having two consecutive
1323          * runs of the shrinker process the same inodes.
1324          */
1325         spin_lock(&fs_info->extent_map_shrinker_lock);
1326         fs_info->extent_map_shrinker_last_ino = ctx.last_ino;
1327         fs_info->extent_map_shrinker_last_root = ctx.last_root;
1328         spin_unlock(&fs_info->extent_map_shrinker_lock);
1329 
1330         if (trace_btrfs_extent_map_shrinker_scan_exit_enabled()) {
1331                 s64 nr = percpu_counter_sum_positive(&fs_info->evictable_extent_maps);
1332 
1333                 trace_btrfs_extent_map_shrinker_scan_exit(fs_info, nr_dropped,
1334                                                           nr, ctx.last_root,
1335                                                           ctx.last_ino);
1336         }
1337 
1338         return nr_dropped;
1339 }
1340 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php