~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/mm/shrinker.c

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0
  2 #include <linux/memcontrol.h>
  3 #include <linux/rwsem.h>
  4 #include <linux/shrinker.h>
  5 #include <linux/rculist.h>
  6 #include <trace/events/vmscan.h>
  7 
  8 #include "internal.h"
  9 
 10 LIST_HEAD(shrinker_list);
 11 DEFINE_MUTEX(shrinker_mutex);
 12 
 13 #ifdef CONFIG_MEMCG
 14 static int shrinker_nr_max;
 15 
 16 static inline int shrinker_unit_size(int nr_items)
 17 {
 18         return (DIV_ROUND_UP(nr_items, SHRINKER_UNIT_BITS) * sizeof(struct shrinker_info_unit *));
 19 }
 20 
 21 static inline void shrinker_unit_free(struct shrinker_info *info, int start)
 22 {
 23         struct shrinker_info_unit **unit;
 24         int nr, i;
 25 
 26         if (!info)
 27                 return;
 28 
 29         unit = info->unit;
 30         nr = DIV_ROUND_UP(info->map_nr_max, SHRINKER_UNIT_BITS);
 31 
 32         for (i = start; i < nr; i++) {
 33                 if (!unit[i])
 34                         break;
 35 
 36                 kfree(unit[i]);
 37                 unit[i] = NULL;
 38         }
 39 }
 40 
 41 static inline int shrinker_unit_alloc(struct shrinker_info *new,
 42                                        struct shrinker_info *old, int nid)
 43 {
 44         struct shrinker_info_unit *unit;
 45         int nr = DIV_ROUND_UP(new->map_nr_max, SHRINKER_UNIT_BITS);
 46         int start = old ? DIV_ROUND_UP(old->map_nr_max, SHRINKER_UNIT_BITS) : 0;
 47         int i;
 48 
 49         for (i = start; i < nr; i++) {
 50                 unit = kzalloc_node(sizeof(*unit), GFP_KERNEL, nid);
 51                 if (!unit) {
 52                         shrinker_unit_free(new, start);
 53                         return -ENOMEM;
 54                 }
 55 
 56                 new->unit[i] = unit;
 57         }
 58 
 59         return 0;
 60 }
 61 
 62 void free_shrinker_info(struct mem_cgroup *memcg)
 63 {
 64         struct mem_cgroup_per_node *pn;
 65         struct shrinker_info *info;
 66         int nid;
 67 
 68         for_each_node(nid) {
 69                 pn = memcg->nodeinfo[nid];
 70                 info = rcu_dereference_protected(pn->shrinker_info, true);
 71                 shrinker_unit_free(info, 0);
 72                 kvfree(info);
 73                 rcu_assign_pointer(pn->shrinker_info, NULL);
 74         }
 75 }
 76 
 77 int alloc_shrinker_info(struct mem_cgroup *memcg)
 78 {
 79         struct shrinker_info *info;
 80         int nid, ret = 0;
 81         int array_size = 0;
 82 
 83         mutex_lock(&shrinker_mutex);
 84         array_size = shrinker_unit_size(shrinker_nr_max);
 85         for_each_node(nid) {
 86                 info = kvzalloc_node(sizeof(*info) + array_size, GFP_KERNEL, nid);
 87                 if (!info)
 88                         goto err;
 89                 info->map_nr_max = shrinker_nr_max;
 90                 if (shrinker_unit_alloc(info, NULL, nid))
 91                         goto err;
 92                 rcu_assign_pointer(memcg->nodeinfo[nid]->shrinker_info, info);
 93         }
 94         mutex_unlock(&shrinker_mutex);
 95 
 96         return ret;
 97 
 98 err:
 99         mutex_unlock(&shrinker_mutex);
100         free_shrinker_info(memcg);
101         return -ENOMEM;
102 }
103 
104 static struct shrinker_info *shrinker_info_protected(struct mem_cgroup *memcg,
105                                                      int nid)
106 {
107         return rcu_dereference_protected(memcg->nodeinfo[nid]->shrinker_info,
108                                          lockdep_is_held(&shrinker_mutex));
109 }
110 
111 static int expand_one_shrinker_info(struct mem_cgroup *memcg, int new_size,
112                                     int old_size, int new_nr_max)
113 {
114         struct shrinker_info *new, *old;
115         struct mem_cgroup_per_node *pn;
116         int nid;
117 
118         for_each_node(nid) {
119                 pn = memcg->nodeinfo[nid];
120                 old = shrinker_info_protected(memcg, nid);
121                 /* Not yet online memcg */
122                 if (!old)
123                         return 0;
124 
125                 /* Already expanded this shrinker_info */
126                 if (new_nr_max <= old->map_nr_max)
127                         continue;
128 
129                 new = kvzalloc_node(sizeof(*new) + new_size, GFP_KERNEL, nid);
130                 if (!new)
131                         return -ENOMEM;
132 
133                 new->map_nr_max = new_nr_max;
134 
135                 memcpy(new->unit, old->unit, old_size);
136                 if (shrinker_unit_alloc(new, old, nid)) {
137                         kvfree(new);
138                         return -ENOMEM;
139                 }
140 
141                 rcu_assign_pointer(pn->shrinker_info, new);
142                 kvfree_rcu(old, rcu);
143         }
144 
145         return 0;
146 }
147 
148 static int expand_shrinker_info(int new_id)
149 {
150         int ret = 0;
151         int new_nr_max = round_up(new_id + 1, SHRINKER_UNIT_BITS);
152         int new_size, old_size = 0;
153         struct mem_cgroup *memcg;
154 
155         if (!root_mem_cgroup)
156                 goto out;
157 
158         lockdep_assert_held(&shrinker_mutex);
159 
160         new_size = shrinker_unit_size(new_nr_max);
161         old_size = shrinker_unit_size(shrinker_nr_max);
162 
163         memcg = mem_cgroup_iter(NULL, NULL, NULL);
164         do {
165                 ret = expand_one_shrinker_info(memcg, new_size, old_size,
166                                                new_nr_max);
167                 if (ret) {
168                         mem_cgroup_iter_break(NULL, memcg);
169                         goto out;
170                 }
171         } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL);
172 out:
173         if (!ret)
174                 shrinker_nr_max = new_nr_max;
175 
176         return ret;
177 }
178 
179 static inline int shrinker_id_to_index(int shrinker_id)
180 {
181         return shrinker_id / SHRINKER_UNIT_BITS;
182 }
183 
184 static inline int shrinker_id_to_offset(int shrinker_id)
185 {
186         return shrinker_id % SHRINKER_UNIT_BITS;
187 }
188 
189 static inline int calc_shrinker_id(int index, int offset)
190 {
191         return index * SHRINKER_UNIT_BITS + offset;
192 }
193 
194 void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id)
195 {
196         if (shrinker_id >= 0 && memcg && !mem_cgroup_is_root(memcg)) {
197                 struct shrinker_info *info;
198                 struct shrinker_info_unit *unit;
199 
200                 rcu_read_lock();
201                 info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info);
202                 unit = info->unit[shrinker_id_to_index(shrinker_id)];
203                 if (!WARN_ON_ONCE(shrinker_id >= info->map_nr_max)) {
204                         /* Pairs with smp mb in shrink_slab() */
205                         smp_mb__before_atomic();
206                         set_bit(shrinker_id_to_offset(shrinker_id), unit->map);
207                 }
208                 rcu_read_unlock();
209         }
210 }
211 
212 static DEFINE_IDR(shrinker_idr);
213 
214 static int shrinker_memcg_alloc(struct shrinker *shrinker)
215 {
216         int id, ret = -ENOMEM;
217 
218         if (mem_cgroup_disabled())
219                 return -ENOSYS;
220 
221         mutex_lock(&shrinker_mutex);
222         id = idr_alloc(&shrinker_idr, shrinker, 0, 0, GFP_KERNEL);
223         if (id < 0)
224                 goto unlock;
225 
226         if (id >= shrinker_nr_max) {
227                 if (expand_shrinker_info(id)) {
228                         idr_remove(&shrinker_idr, id);
229                         goto unlock;
230                 }
231         }
232         shrinker->id = id;
233         ret = 0;
234 unlock:
235         mutex_unlock(&shrinker_mutex);
236         return ret;
237 }
238 
239 static void shrinker_memcg_remove(struct shrinker *shrinker)
240 {
241         int id = shrinker->id;
242 
243         BUG_ON(id < 0);
244 
245         lockdep_assert_held(&shrinker_mutex);
246 
247         idr_remove(&shrinker_idr, id);
248 }
249 
250 static long xchg_nr_deferred_memcg(int nid, struct shrinker *shrinker,
251                                    struct mem_cgroup *memcg)
252 {
253         struct shrinker_info *info;
254         struct shrinker_info_unit *unit;
255         long nr_deferred;
256 
257         rcu_read_lock();
258         info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info);
259         unit = info->unit[shrinker_id_to_index(shrinker->id)];
260         nr_deferred = atomic_long_xchg(&unit->nr_deferred[shrinker_id_to_offset(shrinker->id)], 0);
261         rcu_read_unlock();
262 
263         return nr_deferred;
264 }
265 
266 static long add_nr_deferred_memcg(long nr, int nid, struct shrinker *shrinker,
267                                   struct mem_cgroup *memcg)
268 {
269         struct shrinker_info *info;
270         struct shrinker_info_unit *unit;
271         long nr_deferred;
272 
273         rcu_read_lock();
274         info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info);
275         unit = info->unit[shrinker_id_to_index(shrinker->id)];
276         nr_deferred =
277                 atomic_long_add_return(nr, &unit->nr_deferred[shrinker_id_to_offset(shrinker->id)]);
278         rcu_read_unlock();
279 
280         return nr_deferred;
281 }
282 
283 void reparent_shrinker_deferred(struct mem_cgroup *memcg)
284 {
285         int nid, index, offset;
286         long nr;
287         struct mem_cgroup *parent;
288         struct shrinker_info *child_info, *parent_info;
289         struct shrinker_info_unit *child_unit, *parent_unit;
290 
291         parent = parent_mem_cgroup(memcg);
292         if (!parent)
293                 parent = root_mem_cgroup;
294 
295         /* Prevent from concurrent shrinker_info expand */
296         mutex_lock(&shrinker_mutex);
297         for_each_node(nid) {
298                 child_info = shrinker_info_protected(memcg, nid);
299                 parent_info = shrinker_info_protected(parent, nid);
300                 for (index = 0; index < shrinker_id_to_index(child_info->map_nr_max); index++) {
301                         child_unit = child_info->unit[index];
302                         parent_unit = parent_info->unit[index];
303                         for (offset = 0; offset < SHRINKER_UNIT_BITS; offset++) {
304                                 nr = atomic_long_read(&child_unit->nr_deferred[offset]);
305                                 atomic_long_add(nr, &parent_unit->nr_deferred[offset]);
306                         }
307                 }
308         }
309         mutex_unlock(&shrinker_mutex);
310 }
311 #else
312 static int shrinker_memcg_alloc(struct shrinker *shrinker)
313 {
314         return -ENOSYS;
315 }
316 
317 static void shrinker_memcg_remove(struct shrinker *shrinker)
318 {
319 }
320 
321 static long xchg_nr_deferred_memcg(int nid, struct shrinker *shrinker,
322                                    struct mem_cgroup *memcg)
323 {
324         return 0;
325 }
326 
327 static long add_nr_deferred_memcg(long nr, int nid, struct shrinker *shrinker,
328                                   struct mem_cgroup *memcg)
329 {
330         return 0;
331 }
332 #endif /* CONFIG_MEMCG */
333 
334 static long xchg_nr_deferred(struct shrinker *shrinker,
335                              struct shrink_control *sc)
336 {
337         int nid = sc->nid;
338 
339         if (!(shrinker->flags & SHRINKER_NUMA_AWARE))
340                 nid = 0;
341 
342         if (sc->memcg &&
343             (shrinker->flags & SHRINKER_MEMCG_AWARE))
344                 return xchg_nr_deferred_memcg(nid, shrinker,
345                                               sc->memcg);
346 
347         return atomic_long_xchg(&shrinker->nr_deferred[nid], 0);
348 }
349 
350 
351 static long add_nr_deferred(long nr, struct shrinker *shrinker,
352                             struct shrink_control *sc)
353 {
354         int nid = sc->nid;
355 
356         if (!(shrinker->flags & SHRINKER_NUMA_AWARE))
357                 nid = 0;
358 
359         if (sc->memcg &&
360             (shrinker->flags & SHRINKER_MEMCG_AWARE))
361                 return add_nr_deferred_memcg(nr, nid, shrinker,
362                                              sc->memcg);
363 
364         return atomic_long_add_return(nr, &shrinker->nr_deferred[nid]);
365 }
366 
367 #define SHRINK_BATCH 128
368 
369 static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
370                                     struct shrinker *shrinker, int priority)
371 {
372         unsigned long freed = 0;
373         unsigned long long delta;
374         long total_scan;
375         long freeable;
376         long nr;
377         long new_nr;
378         long batch_size = shrinker->batch ? shrinker->batch
379                                           : SHRINK_BATCH;
380         long scanned = 0, next_deferred;
381 
382         freeable = shrinker->count_objects(shrinker, shrinkctl);
383         if (freeable == 0 || freeable == SHRINK_EMPTY)
384                 return freeable;
385 
386         /*
387          * copy the current shrinker scan count into a local variable
388          * and zero it so that other concurrent shrinker invocations
389          * don't also do this scanning work.
390          */
391         nr = xchg_nr_deferred(shrinker, shrinkctl);
392 
393         if (shrinker->seeks) {
394                 delta = freeable >> priority;
395                 delta *= 4;
396                 do_div(delta, shrinker->seeks);
397         } else {
398                 /*
399                  * These objects don't require any IO to create. Trim
400                  * them aggressively under memory pressure to keep
401                  * them from causing refetches in the IO caches.
402                  */
403                 delta = freeable / 2;
404         }
405 
406         total_scan = nr >> priority;
407         total_scan += delta;
408         total_scan = min(total_scan, (2 * freeable));
409 
410         trace_mm_shrink_slab_start(shrinker, shrinkctl, nr,
411                                    freeable, delta, total_scan, priority);
412 
413         /*
414          * Normally, we should not scan less than batch_size objects in one
415          * pass to avoid too frequent shrinker calls, but if the slab has less
416          * than batch_size objects in total and we are really tight on memory,
417          * we will try to reclaim all available objects, otherwise we can end
418          * up failing allocations although there are plenty of reclaimable
419          * objects spread over several slabs with usage less than the
420          * batch_size.
421          *
422          * We detect the "tight on memory" situations by looking at the total
423          * number of objects we want to scan (total_scan). If it is greater
424          * than the total number of objects on slab (freeable), we must be
425          * scanning at high prio and therefore should try to reclaim as much as
426          * possible.
427          */
428         while (total_scan >= batch_size ||
429                total_scan >= freeable) {
430                 unsigned long ret;
431                 unsigned long nr_to_scan = min(batch_size, total_scan);
432 
433                 shrinkctl->nr_to_scan = nr_to_scan;
434                 shrinkctl->nr_scanned = nr_to_scan;
435                 ret = shrinker->scan_objects(shrinker, shrinkctl);
436                 if (ret == SHRINK_STOP)
437                         break;
438                 freed += ret;
439 
440                 count_vm_events(SLABS_SCANNED, shrinkctl->nr_scanned);
441                 total_scan -= shrinkctl->nr_scanned;
442                 scanned += shrinkctl->nr_scanned;
443 
444                 cond_resched();
445         }
446 
447         /*
448          * The deferred work is increased by any new work (delta) that wasn't
449          * done, decreased by old deferred work that was done now.
450          *
451          * And it is capped to two times of the freeable items.
452          */
453         next_deferred = max_t(long, (nr + delta - scanned), 0);
454         next_deferred = min(next_deferred, (2 * freeable));
455 
456         /*
457          * move the unused scan count back into the shrinker in a
458          * manner that handles concurrent updates.
459          */
460         new_nr = add_nr_deferred(next_deferred, shrinker, shrinkctl);
461 
462         trace_mm_shrink_slab_end(shrinker, shrinkctl->nid, freed, nr, new_nr, total_scan);
463         return freed;
464 }
465 
466 #ifdef CONFIG_MEMCG
467 static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
468                         struct mem_cgroup *memcg, int priority)
469 {
470         struct shrinker_info *info;
471         unsigned long ret, freed = 0;
472         int offset, index = 0;
473 
474         if (!mem_cgroup_online(memcg))
475                 return 0;
476 
477         /*
478          * lockless algorithm of memcg shrink.
479          *
480          * The shrinker_info may be freed asynchronously via RCU in the
481          * expand_one_shrinker_info(), so the rcu_read_lock() needs to be used
482          * to ensure the existence of the shrinker_info.
483          *
484          * The shrinker_info_unit is never freed unless its corresponding memcg
485          * is destroyed. Here we already hold the refcount of memcg, so the
486          * memcg will not be destroyed, and of course shrinker_info_unit will
487          * not be freed.
488          *
489          * So in the memcg shrink:
490          *  step 1: use rcu_read_lock() to guarantee existence of the
491          *          shrinker_info.
492          *  step 2: after getting shrinker_info_unit we can safely release the
493          *          RCU lock.
494          *  step 3: traverse the bitmap and calculate shrinker_id
495          *  step 4: use rcu_read_lock() to guarantee existence of the shrinker.
496          *  step 5: use shrinker_id to find the shrinker, then use
497          *          shrinker_try_get() to guarantee existence of the shrinker,
498          *          then we can release the RCU lock to do do_shrink_slab() that
499          *          may sleep.
500          *  step 6: do shrinker_put() paired with step 5 to put the refcount,
501          *          if the refcount reaches 0, then wake up the waiter in
502          *          shrinker_free() by calling complete().
503          *          Note: here is different from the global shrink, we don't
504          *                need to acquire the RCU lock to guarantee existence of
505          *                the shrinker, because we don't need to use this
506          *                shrinker to traverse the next shrinker in the bitmap.
507          *  step 7: we have already exited the read-side of rcu critical section
508          *          before calling do_shrink_slab(), the shrinker_info may be
509          *          released in expand_one_shrinker_info(), so go back to step 1
510          *          to reacquire the shrinker_info.
511          */
512 again:
513         rcu_read_lock();
514         info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info);
515         if (unlikely(!info))
516                 goto unlock;
517 
518         if (index < shrinker_id_to_index(info->map_nr_max)) {
519                 struct shrinker_info_unit *unit;
520 
521                 unit = info->unit[index];
522 
523                 rcu_read_unlock();
524 
525                 for_each_set_bit(offset, unit->map, SHRINKER_UNIT_BITS) {
526                         struct shrink_control sc = {
527                                 .gfp_mask = gfp_mask,
528                                 .nid = nid,
529                                 .memcg = memcg,
530                         };
531                         struct shrinker *shrinker;
532                         int shrinker_id = calc_shrinker_id(index, offset);
533 
534                         rcu_read_lock();
535                         shrinker = idr_find(&shrinker_idr, shrinker_id);
536                         if (unlikely(!shrinker || !shrinker_try_get(shrinker))) {
537                                 clear_bit(offset, unit->map);
538                                 rcu_read_unlock();
539                                 continue;
540                         }
541                         rcu_read_unlock();
542 
543                         /* Call non-slab shrinkers even though kmem is disabled */
544                         if (!memcg_kmem_online() &&
545                             !(shrinker->flags & SHRINKER_NONSLAB))
546                                 continue;
547 
548                         ret = do_shrink_slab(&sc, shrinker, priority);
549                         if (ret == SHRINK_EMPTY) {
550                                 clear_bit(offset, unit->map);
551                                 /*
552                                  * After the shrinker reported that it had no objects to
553                                  * free, but before we cleared the corresponding bit in
554                                  * the memcg shrinker map, a new object might have been
555                                  * added. To make sure, we have the bit set in this
556                                  * case, we invoke the shrinker one more time and reset
557                                  * the bit if it reports that it is not empty anymore.
558                                  * The memory barrier here pairs with the barrier in
559                                  * set_shrinker_bit():
560                                  *
561                                  * list_lru_add()     shrink_slab_memcg()
562                                  *   list_add_tail()    clear_bit()
563                                  *   <MB>               <MB>
564                                  *   set_bit()          do_shrink_slab()
565                                  */
566                                 smp_mb__after_atomic();
567                                 ret = do_shrink_slab(&sc, shrinker, priority);
568                                 if (ret == SHRINK_EMPTY)
569                                         ret = 0;
570                                 else
571                                         set_shrinker_bit(memcg, nid, shrinker_id);
572                         }
573                         freed += ret;
574                         shrinker_put(shrinker);
575                 }
576 
577                 index++;
578                 goto again;
579         }
580 unlock:
581         rcu_read_unlock();
582         return freed;
583 }
584 #else /* !CONFIG_MEMCG */
585 static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
586                         struct mem_cgroup *memcg, int priority)
587 {
588         return 0;
589 }
590 #endif /* CONFIG_MEMCG */
591 
592 /**
593  * shrink_slab - shrink slab caches
594  * @gfp_mask: allocation context
595  * @nid: node whose slab caches to target
596  * @memcg: memory cgroup whose slab caches to target
597  * @priority: the reclaim priority
598  *
599  * Call the shrink functions to age shrinkable caches.
600  *
601  * @nid is passed along to shrinkers with SHRINKER_NUMA_AWARE set,
602  * unaware shrinkers will receive a node id of 0 instead.
603  *
604  * @memcg specifies the memory cgroup to target. Unaware shrinkers
605  * are called only if it is the root cgroup.
606  *
607  * @priority is sc->priority, we take the number of objects and >> by priority
608  * in order to get the scan target.
609  *
610  * Returns the number of reclaimed slab objects.
611  */
612 unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg,
613                           int priority)
614 {
615         unsigned long ret, freed = 0;
616         struct shrinker *shrinker;
617 
618         /*
619          * The root memcg might be allocated even though memcg is disabled
620          * via "cgroup_disable=memory" boot parameter.  This could make
621          * mem_cgroup_is_root() return false, then just run memcg slab
622          * shrink, but skip global shrink.  This may result in premature
623          * oom.
624          */
625         if (!mem_cgroup_disabled() && !mem_cgroup_is_root(memcg))
626                 return shrink_slab_memcg(gfp_mask, nid, memcg, priority);
627 
628         /*
629          * lockless algorithm of global shrink.
630          *
631          * In the unregistration setp, the shrinker will be freed asynchronously
632          * via RCU after its refcount reaches 0. So both rcu_read_lock() and
633          * shrinker_try_get() can be used to ensure the existence of the shrinker.
634          *
635          * So in the global shrink:
636          *  step 1: use rcu_read_lock() to guarantee existence of the shrinker
637          *          and the validity of the shrinker_list walk.
638          *  step 2: use shrinker_try_get() to try get the refcount, if successful,
639          *          then the existence of the shrinker can also be guaranteed,
640          *          so we can release the RCU lock to do do_shrink_slab() that
641          *          may sleep.
642          *  step 3: *MUST* to reacquire the RCU lock before calling shrinker_put(),
643          *          which ensures that neither this shrinker nor the next shrinker
644          *          will be freed in the next traversal operation.
645          *  step 4: do shrinker_put() paired with step 2 to put the refcount,
646          *          if the refcount reaches 0, then wake up the waiter in
647          *          shrinker_free() by calling complete().
648          */
649         rcu_read_lock();
650         list_for_each_entry_rcu(shrinker, &shrinker_list, list) {
651                 struct shrink_control sc = {
652                         .gfp_mask = gfp_mask,
653                         .nid = nid,
654                         .memcg = memcg,
655                 };
656 
657                 if (!shrinker_try_get(shrinker))
658                         continue;
659 
660                 rcu_read_unlock();
661 
662                 ret = do_shrink_slab(&sc, shrinker, priority);
663                 if (ret == SHRINK_EMPTY)
664                         ret = 0;
665                 freed += ret;
666 
667                 rcu_read_lock();
668                 shrinker_put(shrinker);
669         }
670 
671         rcu_read_unlock();
672         cond_resched();
673         return freed;
674 }
675 
676 struct shrinker *shrinker_alloc(unsigned int flags, const char *fmt, ...)
677 {
678         struct shrinker *shrinker;
679         unsigned int size;
680         va_list ap;
681         int err;
682 
683         shrinker = kzalloc(sizeof(struct shrinker), GFP_KERNEL);
684         if (!shrinker)
685                 return NULL;
686 
687         va_start(ap, fmt);
688         err = shrinker_debugfs_name_alloc(shrinker, fmt, ap);
689         va_end(ap);
690         if (err)
691                 goto err_name;
692 
693         shrinker->flags = flags | SHRINKER_ALLOCATED;
694         shrinker->seeks = DEFAULT_SEEKS;
695 
696         if (flags & SHRINKER_MEMCG_AWARE) {
697                 err = shrinker_memcg_alloc(shrinker);
698                 if (err == -ENOSYS) {
699                         /* Memcg is not supported, fallback to non-memcg-aware shrinker. */
700                         shrinker->flags &= ~SHRINKER_MEMCG_AWARE;
701                         goto non_memcg;
702                 }
703 
704                 if (err)
705                         goto err_flags;
706 
707                 return shrinker;
708         }
709 
710 non_memcg:
711         /*
712          * The nr_deferred is available on per memcg level for memcg aware
713          * shrinkers, so only allocate nr_deferred in the following cases:
714          *  - non-memcg-aware shrinkers
715          *  - !CONFIG_MEMCG
716          *  - memcg is disabled by kernel command line
717          */
718         size = sizeof(*shrinker->nr_deferred);
719         if (flags & SHRINKER_NUMA_AWARE)
720                 size *= nr_node_ids;
721 
722         shrinker->nr_deferred = kzalloc(size, GFP_KERNEL);
723         if (!shrinker->nr_deferred)
724                 goto err_flags;
725 
726         return shrinker;
727 
728 err_flags:
729         shrinker_debugfs_name_free(shrinker);
730 err_name:
731         kfree(shrinker);
732         return NULL;
733 }
734 EXPORT_SYMBOL_GPL(shrinker_alloc);
735 
736 void shrinker_register(struct shrinker *shrinker)
737 {
738         if (unlikely(!(shrinker->flags & SHRINKER_ALLOCATED))) {
739                 pr_warn("Must use shrinker_alloc() to dynamically allocate the shrinker");
740                 return;
741         }
742 
743         mutex_lock(&shrinker_mutex);
744         list_add_tail_rcu(&shrinker->list, &shrinker_list);
745         shrinker->flags |= SHRINKER_REGISTERED;
746         shrinker_debugfs_add(shrinker);
747         mutex_unlock(&shrinker_mutex);
748 
749         init_completion(&shrinker->done);
750         /*
751          * Now the shrinker is fully set up, take the first reference to it to
752          * indicate that lookup operations are now allowed to use it via
753          * shrinker_try_get().
754          */
755         refcount_set(&shrinker->refcount, 1);
756 }
757 EXPORT_SYMBOL_GPL(shrinker_register);
758 
759 static void shrinker_free_rcu_cb(struct rcu_head *head)
760 {
761         struct shrinker *shrinker = container_of(head, struct shrinker, rcu);
762 
763         kfree(shrinker->nr_deferred);
764         kfree(shrinker);
765 }
766 
767 void shrinker_free(struct shrinker *shrinker)
768 {
769         struct dentry *debugfs_entry = NULL;
770         int debugfs_id;
771 
772         if (!shrinker)
773                 return;
774 
775         if (shrinker->flags & SHRINKER_REGISTERED) {
776                 /* drop the initial refcount */
777                 shrinker_put(shrinker);
778                 /*
779                  * Wait for all lookups of the shrinker to complete, after that,
780                  * no shrinker is running or will run again, then we can safely
781                  * free it asynchronously via RCU and safely free the structure
782                  * where the shrinker is located, such as super_block etc.
783                  */
784                 wait_for_completion(&shrinker->done);
785         }
786 
787         mutex_lock(&shrinker_mutex);
788         if (shrinker->flags & SHRINKER_REGISTERED) {
789                 /*
790                  * Now we can safely remove it from the shrinker_list and then
791                  * free it.
792                  */
793                 list_del_rcu(&shrinker->list);
794                 debugfs_entry = shrinker_debugfs_detach(shrinker, &debugfs_id);
795                 shrinker->flags &= ~SHRINKER_REGISTERED;
796         }
797 
798         shrinker_debugfs_name_free(shrinker);
799 
800         if (shrinker->flags & SHRINKER_MEMCG_AWARE)
801                 shrinker_memcg_remove(shrinker);
802         mutex_unlock(&shrinker_mutex);
803 
804         if (debugfs_entry)
805                 shrinker_debugfs_remove(debugfs_entry, debugfs_id);
806 
807         call_rcu(&shrinker->rcu, shrinker_free_rcu_cb);
808 }
809 EXPORT_SYMBOL_GPL(shrinker_free);
810 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php