~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/fs/ubifs/shrinker.c

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0-only
  2 /*
  3  * This file is part of UBIFS.
  4  *
  5  * Copyright (C) 2006-2008 Nokia Corporation.
  6  *
  7  * Authors: Artem Bityutskiy (Битюцкий Артём)
  8  *          Adrian Hunter
  9  */
 10 
 11 /*
 12  * This file implements UBIFS shrinker which evicts clean znodes from the TNC
 13  * tree when Linux VM needs more RAM.
 14  *
 15  * We do not implement any LRU lists to find oldest znodes to free because it
 16  * would add additional overhead to the file system fast paths. So the shrinker
 17  * just walks the TNC tree when searching for znodes to free.
 18  *
 19  * If the root of a TNC sub-tree is clean and old enough, then the children are
 20  * also clean and old enough. So the shrinker walks the TNC in level order and
 21  * dumps entire sub-trees.
 22  *
 23  * The age of znodes is just the time-stamp when they were last looked at.
 24  * The current shrinker first tries to evict old znodes, then young ones.
 25  *
 26  * Since the shrinker is global, it has to protect against races with FS
 27  * un-mounts, which is done by the 'ubifs_infos_lock' and 'c->umount_mutex'.
 28  */
 29 
 30 #include "ubifs.h"
 31 
 32 /* List of all UBIFS file-system instances */
 33 LIST_HEAD(ubifs_infos);
 34 
 35 /*
 36  * We number each shrinker run and record the number on the ubifs_info structure
 37  * so that we can easily work out which ubifs_info structures have already been
 38  * done by the current run.
 39  */
 40 static unsigned int shrinker_run_no;
 41 
 42 /* Protects 'ubifs_infos' list */
 43 DEFINE_SPINLOCK(ubifs_infos_lock);
 44 
 45 /* Global clean znode counter (for all mounted UBIFS instances) */
 46 atomic_long_t ubifs_clean_zn_cnt;
 47 
 48 /**
 49  * shrink_tnc - shrink TNC tree.
 50  * @c: UBIFS file-system description object
 51  * @nr: number of znodes to free
 52  * @age: the age of znodes to free
 53  * @contention: if any contention, this is set to %1
 54  *
 55  * This function traverses TNC tree and frees clean znodes. It does not free
 56  * clean znodes which younger then @age. Returns number of freed znodes.
 57  */
 58 static int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention)
 59 {
 60         int total_freed = 0;
 61         struct ubifs_znode *znode, *zprev;
 62         time64_t time = ktime_get_seconds();
 63 
 64         ubifs_assert(c, mutex_is_locked(&c->umount_mutex));
 65         ubifs_assert(c, mutex_is_locked(&c->tnc_mutex));
 66 
 67         if (!c->zroot.znode || atomic_long_read(&c->clean_zn_cnt) == 0)
 68                 return 0;
 69 
 70         /*
 71          * Traverse the TNC tree in levelorder manner, so that it is possible
 72          * to destroy large sub-trees. Indeed, if a znode is old, then all its
 73          * children are older or of the same age.
 74          *
 75          * Note, we are holding 'c->tnc_mutex', so we do not have to lock the
 76          * 'c->space_lock' when _reading_ 'c->clean_zn_cnt', because it is
 77          * changed only when the 'c->tnc_mutex' is held.
 78          */
 79         zprev = NULL;
 80         znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, NULL);
 81         while (znode && total_freed < nr &&
 82                atomic_long_read(&c->clean_zn_cnt) > 0) {
 83                 int freed;
 84 
 85                 /*
 86                  * If the znode is clean, but it is in the 'c->cnext' list, this
 87                  * means that this znode has just been written to flash as a
 88                  * part of commit and was marked clean. They will be removed
 89                  * from the list at end commit. We cannot change the list,
 90                  * because it is not protected by any mutex (design decision to
 91                  * make commit really independent and parallel to main I/O). So
 92                  * we just skip these znodes.
 93                  *
 94                  * Note, the 'clean_zn_cnt' counters are not updated until
 95                  * after the commit, so the UBIFS shrinker does not report
 96                  * the znodes which are in the 'c->cnext' list as freeable.
 97                  *
 98                  * Also note, if the root of a sub-tree is not in 'c->cnext',
 99                  * then the whole sub-tree is not in 'c->cnext' as well, so it
100                  * is safe to dump whole sub-tree.
101                  */
102 
103                 if (znode->cnext) {
104                         /*
105                          * Very soon these znodes will be removed from the list
106                          * and become freeable.
107                          */
108                         *contention = 1;
109                 } else if (!ubifs_zn_dirty(znode) &&
110                            abs(time - znode->time) >= age) {
111                         if (znode->parent)
112                                 znode->parent->zbranch[znode->iip].znode = NULL;
113                         else
114                                 c->zroot.znode = NULL;
115 
116                         freed = ubifs_destroy_tnc_subtree(c, znode);
117                         atomic_long_sub(freed, &ubifs_clean_zn_cnt);
118                         atomic_long_sub(freed, &c->clean_zn_cnt);
119                         total_freed += freed;
120                         znode = zprev;
121                 }
122 
123                 if (unlikely(!c->zroot.znode))
124                         break;
125 
126                 zprev = znode;
127                 znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, znode);
128                 cond_resched();
129         }
130 
131         return total_freed;
132 }
133 
134 /**
135  * shrink_tnc_trees - shrink UBIFS TNC trees.
136  * @nr: number of znodes to free
137  * @age: the age of znodes to free
138  * @contention: if any contention, this is set to %1
139  *
140  * This function walks the list of mounted UBIFS file-systems and frees clean
141  * znodes which are older than @age, until at least @nr znodes are freed.
142  * Returns the number of freed znodes.
143  */
144 static int shrink_tnc_trees(int nr, int age, int *contention)
145 {
146         struct ubifs_info *c;
147         struct list_head *p;
148         unsigned int run_no;
149         int freed = 0;
150 
151         spin_lock(&ubifs_infos_lock);
152         do {
153                 run_no = ++shrinker_run_no;
154         } while (run_no == 0);
155         /* Iterate over all mounted UBIFS file-systems and try to shrink them */
156         p = ubifs_infos.next;
157         while (p != &ubifs_infos) {
158                 c = list_entry(p, struct ubifs_info, infos_list);
159                 /*
160                  * We move the ones we do to the end of the list, so we stop
161                  * when we see one we have already done.
162                  */
163                 if (c->shrinker_run_no == run_no)
164                         break;
165                 if (!mutex_trylock(&c->umount_mutex)) {
166                         /* Some un-mount is in progress, try next FS */
167                         *contention = 1;
168                         p = p->next;
169                         continue;
170                 }
171                 /*
172                  * We're holding 'c->umount_mutex', so the file-system won't go
173                  * away.
174                  */
175                 if (!mutex_trylock(&c->tnc_mutex)) {
176                         mutex_unlock(&c->umount_mutex);
177                         *contention = 1;
178                         p = p->next;
179                         continue;
180                 }
181                 spin_unlock(&ubifs_infos_lock);
182                 /*
183                  * OK, now we have TNC locked, the file-system cannot go away -
184                  * it is safe to reap the cache.
185                  */
186                 c->shrinker_run_no = run_no;
187                 freed += shrink_tnc(c, nr, age, contention);
188                 mutex_unlock(&c->tnc_mutex);
189                 spin_lock(&ubifs_infos_lock);
190                 /* Get the next list element before we move this one */
191                 p = p->next;
192                 /*
193                  * Move this one to the end of the list to provide some
194                  * fairness.
195                  */
196                 list_move_tail(&c->infos_list, &ubifs_infos);
197                 mutex_unlock(&c->umount_mutex);
198                 if (freed >= nr)
199                         break;
200         }
201         spin_unlock(&ubifs_infos_lock);
202         return freed;
203 }
204 
205 /**
206  * kick_a_thread - kick a background thread to start commit.
207  *
208  * This function kicks a background thread to start background commit. Returns
209  * %-1 if a thread was kicked or there is another reason to assume the memory
210  * will soon be freed or become freeable. If there are no dirty znodes, returns
211  * %0.
212  */
213 static int kick_a_thread(void)
214 {
215         int i;
216         struct ubifs_info *c;
217 
218         /*
219          * Iterate over all mounted UBIFS file-systems and find out if there is
220          * already an ongoing commit operation there. If no, then iterate for
221          * the second time and initiate background commit.
222          */
223         spin_lock(&ubifs_infos_lock);
224         for (i = 0; i < 2; i++) {
225                 list_for_each_entry(c, &ubifs_infos, infos_list) {
226                         long dirty_zn_cnt;
227 
228                         if (!mutex_trylock(&c->umount_mutex)) {
229                                 /*
230                                  * Some un-mount is in progress, it will
231                                  * certainly free memory, so just return.
232                                  */
233                                 spin_unlock(&ubifs_infos_lock);
234                                 return -1;
235                         }
236 
237                         dirty_zn_cnt = atomic_long_read(&c->dirty_zn_cnt);
238 
239                         if (!dirty_zn_cnt || c->cmt_state == COMMIT_BROKEN ||
240                             c->ro_mount || c->ro_error) {
241                                 mutex_unlock(&c->umount_mutex);
242                                 continue;
243                         }
244 
245                         if (c->cmt_state != COMMIT_RESTING) {
246                                 spin_unlock(&ubifs_infos_lock);
247                                 mutex_unlock(&c->umount_mutex);
248                                 return -1;
249                         }
250 
251                         if (i == 1) {
252                                 list_move_tail(&c->infos_list, &ubifs_infos);
253                                 spin_unlock(&ubifs_infos_lock);
254 
255                                 ubifs_request_bg_commit(c);
256                                 mutex_unlock(&c->umount_mutex);
257                                 return -1;
258                         }
259                         mutex_unlock(&c->umount_mutex);
260                 }
261         }
262         spin_unlock(&ubifs_infos_lock);
263 
264         return 0;
265 }
266 
267 unsigned long ubifs_shrink_count(struct shrinker *shrink,
268                                  struct shrink_control *sc)
269 {
270         long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
271 
272         /*
273          * Due to the way UBIFS updates the clean znode counter it may
274          * temporarily be negative.
275          */
276         return clean_zn_cnt >= 0 ? clean_zn_cnt : 1;
277 }
278 
279 unsigned long ubifs_shrink_scan(struct shrinker *shrink,
280                                 struct shrink_control *sc)
281 {
282         unsigned long nr = sc->nr_to_scan;
283         int contention = 0;
284         unsigned long freed;
285         long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
286 
287         if (!clean_zn_cnt) {
288                 /*
289                  * No clean znodes, nothing to reap. All we can do in this case
290                  * is to kick background threads to start commit, which will
291                  * probably make clean znodes which, in turn, will be freeable.
292                  * And we return -1 which means will make VM call us again
293                  * later.
294                  */
295                 dbg_tnc("no clean znodes, kick a thread");
296                 return kick_a_thread();
297         }
298 
299         freed = shrink_tnc_trees(nr, OLD_ZNODE_AGE, &contention);
300         if (freed >= nr)
301                 goto out;
302 
303         dbg_tnc("not enough old znodes, try to free young ones");
304         freed += shrink_tnc_trees(nr - freed, YOUNG_ZNODE_AGE, &contention);
305         if (freed >= nr)
306                 goto out;
307 
308         dbg_tnc("not enough young znodes, free all");
309         freed += shrink_tnc_trees(nr - freed, 0, &contention);
310 
311         if (!freed && contention) {
312                 dbg_tnc("freed nothing, but contention");
313                 return SHRINK_STOP;
314         }
315 
316 out:
317         dbg_tnc("%lu znodes were freed, requested %lu", freed, nr);
318         return freed;
319 }
320 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php