~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/lib/stackdepot.c

Version: ~ [ linux-6.11-rc3 ] ~ [ linux-6.10.4 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.45 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.104 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.164 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.223 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.281 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.319 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0-only
  2 /*
  3  * Stack depot - a stack trace storage that avoids duplication.
  4  *
  5  * Internally, stack depot maintains a hash table of unique stacktraces. The
  6  * stack traces themselves are stored contiguously one after another in a set
  7  * of separate page allocations.
  8  *
  9  * Author: Alexander Potapenko <glider@google.com>
 10  * Copyright (C) 2016 Google, Inc.
 11  *
 12  * Based on the code by Dmitry Chernenkov.
 13  */
 14 
 15 #define pr_fmt(fmt) "stackdepot: " fmt
 16 
 17 #include <linux/debugfs.h>
 18 #include <linux/gfp.h>
 19 #include <linux/jhash.h>
 20 #include <linux/kernel.h>
 21 #include <linux/kmsan.h>
 22 #include <linux/list.h>
 23 #include <linux/mm.h>
 24 #include <linux/mutex.h>
 25 #include <linux/poison.h>
 26 #include <linux/printk.h>
 27 #include <linux/rculist.h>
 28 #include <linux/rcupdate.h>
 29 #include <linux/refcount.h>
 30 #include <linux/slab.h>
 31 #include <linux/spinlock.h>
 32 #include <linux/stacktrace.h>
 33 #include <linux/stackdepot.h>
 34 #include <linux/string.h>
 35 #include <linux/types.h>
 36 #include <linux/memblock.h>
 37 #include <linux/kasan-enabled.h>
 38 
 39 #define DEPOT_POOLS_CAP 8192
 40 /* The pool_index is offset by 1 so the first record does not have a 0 handle. */
 41 #define DEPOT_MAX_POOLS \
 42         (((1LL << (DEPOT_POOL_INDEX_BITS)) - 1 < DEPOT_POOLS_CAP) ? \
 43          (1LL << (DEPOT_POOL_INDEX_BITS)) - 1 : DEPOT_POOLS_CAP)
 44 
 45 static bool stack_depot_disabled;
 46 static bool __stack_depot_early_init_requested __initdata = IS_ENABLED(CONFIG_STACKDEPOT_ALWAYS_INIT);
 47 static bool __stack_depot_early_init_passed __initdata;
 48 
 49 /* Use one hash table bucket per 16 KB of memory. */
 50 #define STACK_HASH_TABLE_SCALE 14
 51 /* Limit the number of buckets between 4K and 1M. */
 52 #define STACK_BUCKET_NUMBER_ORDER_MIN 12
 53 #define STACK_BUCKET_NUMBER_ORDER_MAX 20
 54 /* Initial seed for jhash2. */
 55 #define STACK_HASH_SEED 0x9747b28c
 56 
 57 /* Hash table of stored stack records. */
 58 static struct list_head *stack_table;
 59 /* Fixed order of the number of table buckets. Used when KASAN is enabled. */
 60 static unsigned int stack_bucket_number_order;
 61 /* Hash mask for indexing the table. */
 62 static unsigned int stack_hash_mask;
 63 
 64 /* Array of memory regions that store stack records. */
 65 static void *stack_pools[DEPOT_MAX_POOLS];
 66 /* Newly allocated pool that is not yet added to stack_pools. */
 67 static void *new_pool;
 68 /* Number of pools in stack_pools. */
 69 static int pools_num;
 70 /* Offset to the unused space in the currently used pool. */
 71 static size_t pool_offset = DEPOT_POOL_SIZE;
 72 /* Freelist of stack records within stack_pools. */
 73 static LIST_HEAD(free_stacks);
 74 /* The lock must be held when performing pool or freelist modifications. */
 75 static DEFINE_RAW_SPINLOCK(pool_lock);
 76 
 77 /* Statistics counters for debugfs. */
 78 enum depot_counter_id {
 79         DEPOT_COUNTER_REFD_ALLOCS,
 80         DEPOT_COUNTER_REFD_FREES,
 81         DEPOT_COUNTER_REFD_INUSE,
 82         DEPOT_COUNTER_FREELIST_SIZE,
 83         DEPOT_COUNTER_PERSIST_COUNT,
 84         DEPOT_COUNTER_PERSIST_BYTES,
 85         DEPOT_COUNTER_COUNT,
 86 };
 87 static long counters[DEPOT_COUNTER_COUNT];
 88 static const char *const counter_names[] = {
 89         [DEPOT_COUNTER_REFD_ALLOCS]     = "refcounted_allocations",
 90         [DEPOT_COUNTER_REFD_FREES]      = "refcounted_frees",
 91         [DEPOT_COUNTER_REFD_INUSE]      = "refcounted_in_use",
 92         [DEPOT_COUNTER_FREELIST_SIZE]   = "freelist_size",
 93         [DEPOT_COUNTER_PERSIST_COUNT]   = "persistent_count",
 94         [DEPOT_COUNTER_PERSIST_BYTES]   = "persistent_bytes",
 95 };
 96 static_assert(ARRAY_SIZE(counter_names) == DEPOT_COUNTER_COUNT);
 97 
 98 static int __init disable_stack_depot(char *str)
 99 {
100         return kstrtobool(str, &stack_depot_disabled);
101 }
102 early_param("stack_depot_disable", disable_stack_depot);
103 
104 void __init stack_depot_request_early_init(void)
105 {
106         /* Too late to request early init now. */
107         WARN_ON(__stack_depot_early_init_passed);
108 
109         __stack_depot_early_init_requested = true;
110 }
111 
112 /* Initialize list_head's within the hash table. */
113 static void init_stack_table(unsigned long entries)
114 {
115         unsigned long i;
116 
117         for (i = 0; i < entries; i++)
118                 INIT_LIST_HEAD(&stack_table[i]);
119 }
120 
121 /* Allocates a hash table via memblock. Can only be used during early boot. */
122 int __init stack_depot_early_init(void)
123 {
124         unsigned long entries = 0;
125 
126         /* This function must be called only once, from mm_init(). */
127         if (WARN_ON(__stack_depot_early_init_passed))
128                 return 0;
129         __stack_depot_early_init_passed = true;
130 
131         /*
132          * Print disabled message even if early init has not been requested:
133          * stack_depot_init() will not print one.
134          */
135         if (stack_depot_disabled) {
136                 pr_info("disabled\n");
137                 return 0;
138         }
139 
140         /*
141          * If KASAN is enabled, use the maximum order: KASAN is frequently used
142          * in fuzzing scenarios, which leads to a large number of different
143          * stack traces being stored in stack depot.
144          */
145         if (kasan_enabled() && !stack_bucket_number_order)
146                 stack_bucket_number_order = STACK_BUCKET_NUMBER_ORDER_MAX;
147 
148         /*
149          * Check if early init has been requested after setting
150          * stack_bucket_number_order: stack_depot_init() uses its value.
151          */
152         if (!__stack_depot_early_init_requested)
153                 return 0;
154 
155         /*
156          * If stack_bucket_number_order is not set, leave entries as 0 to rely
157          * on the automatic calculations performed by alloc_large_system_hash().
158          */
159         if (stack_bucket_number_order)
160                 entries = 1UL << stack_bucket_number_order;
161         pr_info("allocating hash table via alloc_large_system_hash\n");
162         stack_table = alloc_large_system_hash("stackdepot",
163                                                 sizeof(struct list_head),
164                                                 entries,
165                                                 STACK_HASH_TABLE_SCALE,
166                                                 HASH_EARLY,
167                                                 NULL,
168                                                 &stack_hash_mask,
169                                                 1UL << STACK_BUCKET_NUMBER_ORDER_MIN,
170                                                 1UL << STACK_BUCKET_NUMBER_ORDER_MAX);
171         if (!stack_table) {
172                 pr_err("hash table allocation failed, disabling\n");
173                 stack_depot_disabled = true;
174                 return -ENOMEM;
175         }
176         if (!entries) {
177                 /*
178                  * Obtain the number of entries that was calculated by
179                  * alloc_large_system_hash().
180                  */
181                 entries = stack_hash_mask + 1;
182         }
183         init_stack_table(entries);
184 
185         return 0;
186 }
187 
188 /* Allocates a hash table via kvcalloc. Can be used after boot. */
189 int stack_depot_init(void)
190 {
191         static DEFINE_MUTEX(stack_depot_init_mutex);
192         unsigned long entries;
193         int ret = 0;
194 
195         mutex_lock(&stack_depot_init_mutex);
196 
197         if (stack_depot_disabled || stack_table)
198                 goto out_unlock;
199 
200         /*
201          * Similarly to stack_depot_early_init, use stack_bucket_number_order
202          * if assigned, and rely on automatic scaling otherwise.
203          */
204         if (stack_bucket_number_order) {
205                 entries = 1UL << stack_bucket_number_order;
206         } else {
207                 int scale = STACK_HASH_TABLE_SCALE;
208 
209                 entries = nr_free_buffer_pages();
210                 entries = roundup_pow_of_two(entries);
211 
212                 if (scale > PAGE_SHIFT)
213                         entries >>= (scale - PAGE_SHIFT);
214                 else
215                         entries <<= (PAGE_SHIFT - scale);
216         }
217 
218         if (entries < 1UL << STACK_BUCKET_NUMBER_ORDER_MIN)
219                 entries = 1UL << STACK_BUCKET_NUMBER_ORDER_MIN;
220         if (entries > 1UL << STACK_BUCKET_NUMBER_ORDER_MAX)
221                 entries = 1UL << STACK_BUCKET_NUMBER_ORDER_MAX;
222 
223         pr_info("allocating hash table of %lu entries via kvcalloc\n", entries);
224         stack_table = kvcalloc(entries, sizeof(struct list_head), GFP_KERNEL);
225         if (!stack_table) {
226                 pr_err("hash table allocation failed, disabling\n");
227                 stack_depot_disabled = true;
228                 ret = -ENOMEM;
229                 goto out_unlock;
230         }
231         stack_hash_mask = entries - 1;
232         init_stack_table(entries);
233 
234 out_unlock:
235         mutex_unlock(&stack_depot_init_mutex);
236 
237         return ret;
238 }
239 EXPORT_SYMBOL_GPL(stack_depot_init);
240 
241 /*
242  * Initializes new stack pool, and updates the list of pools.
243  */
244 static bool depot_init_pool(void **prealloc)
245 {
246         lockdep_assert_held(&pool_lock);
247 
248         if (unlikely(pools_num >= DEPOT_MAX_POOLS)) {
249                 /* Bail out if we reached the pool limit. */
250                 WARN_ON_ONCE(pools_num > DEPOT_MAX_POOLS); /* should never happen */
251                 WARN_ON_ONCE(!new_pool); /* to avoid unnecessary pre-allocation */
252                 WARN_ONCE(1, "Stack depot reached limit capacity");
253                 return false;
254         }
255 
256         if (!new_pool && *prealloc) {
257                 /* We have preallocated memory, use it. */
258                 WRITE_ONCE(new_pool, *prealloc);
259                 *prealloc = NULL;
260         }
261 
262         if (!new_pool)
263                 return false; /* new_pool and *prealloc are NULL */
264 
265         /* Save reference to the pool to be used by depot_fetch_stack(). */
266         stack_pools[pools_num] = new_pool;
267 
268         /*
269          * Stack depot tries to keep an extra pool allocated even before it runs
270          * out of space in the currently used pool.
271          *
272          * To indicate that a new preallocation is needed new_pool is reset to
273          * NULL; do not reset to NULL if we have reached the maximum number of
274          * pools.
275          */
276         if (pools_num < DEPOT_MAX_POOLS)
277                 WRITE_ONCE(new_pool, NULL);
278         else
279                 WRITE_ONCE(new_pool, STACK_DEPOT_POISON);
280 
281         /* Pairs with concurrent READ_ONCE() in depot_fetch_stack(). */
282         WRITE_ONCE(pools_num, pools_num + 1);
283         ASSERT_EXCLUSIVE_WRITER(pools_num);
284 
285         pool_offset = 0;
286 
287         return true;
288 }
289 
290 /* Keeps the preallocated memory to be used for a new stack depot pool. */
291 static void depot_keep_new_pool(void **prealloc)
292 {
293         lockdep_assert_held(&pool_lock);
294 
295         /*
296          * If a new pool is already saved or the maximum number of
297          * pools is reached, do not use the preallocated memory.
298          */
299         if (new_pool)
300                 return;
301 
302         WRITE_ONCE(new_pool, *prealloc);
303         *prealloc = NULL;
304 }
305 
306 /*
307  * Try to initialize a new stack record from the current pool, a cached pool, or
308  * the current pre-allocation.
309  */
310 static struct stack_record *depot_pop_free_pool(void **prealloc, size_t size)
311 {
312         struct stack_record *stack;
313         void *current_pool;
314         u32 pool_index;
315 
316         lockdep_assert_held(&pool_lock);
317 
318         if (pool_offset + size > DEPOT_POOL_SIZE) {
319                 if (!depot_init_pool(prealloc))
320                         return NULL;
321         }
322 
323         if (WARN_ON_ONCE(pools_num < 1))
324                 return NULL;
325         pool_index = pools_num - 1;
326         current_pool = stack_pools[pool_index];
327         if (WARN_ON_ONCE(!current_pool))
328                 return NULL;
329 
330         stack = current_pool + pool_offset;
331 
332         /* Pre-initialize handle once. */
333         stack->handle.pool_index_plus_1 = pool_index + 1;
334         stack->handle.offset = pool_offset >> DEPOT_STACK_ALIGN;
335         stack->handle.extra = 0;
336         INIT_LIST_HEAD(&stack->hash_list);
337 
338         pool_offset += size;
339 
340         return stack;
341 }
342 
343 /* Try to find next free usable entry from the freelist. */
344 static struct stack_record *depot_pop_free(void)
345 {
346         struct stack_record *stack;
347 
348         lockdep_assert_held(&pool_lock);
349 
350         if (list_empty(&free_stacks))
351                 return NULL;
352 
353         /*
354          * We maintain the invariant that the elements in front are least
355          * recently used, and are therefore more likely to be associated with an
356          * RCU grace period in the past. Consequently it is sufficient to only
357          * check the first entry.
358          */
359         stack = list_first_entry(&free_stacks, struct stack_record, free_list);
360         if (!poll_state_synchronize_rcu(stack->rcu_state))
361                 return NULL;
362 
363         list_del(&stack->free_list);
364         counters[DEPOT_COUNTER_FREELIST_SIZE]--;
365 
366         return stack;
367 }
368 
369 static inline size_t depot_stack_record_size(struct stack_record *s, unsigned int nr_entries)
370 {
371         const size_t used = flex_array_size(s, entries, nr_entries);
372         const size_t unused = sizeof(s->entries) - used;
373 
374         WARN_ON_ONCE(sizeof(s->entries) < used);
375 
376         return ALIGN(sizeof(struct stack_record) - unused, 1 << DEPOT_STACK_ALIGN);
377 }
378 
379 /* Allocates a new stack in a stack depot pool. */
380 static struct stack_record *
381 depot_alloc_stack(unsigned long *entries, unsigned int nr_entries, u32 hash, depot_flags_t flags, void **prealloc)
382 {
383         struct stack_record *stack = NULL;
384         size_t record_size;
385 
386         lockdep_assert_held(&pool_lock);
387 
388         /* This should already be checked by public API entry points. */
389         if (WARN_ON_ONCE(!nr_entries))
390                 return NULL;
391 
392         /* Limit number of saved frames to CONFIG_STACKDEPOT_MAX_FRAMES. */
393         if (nr_entries > CONFIG_STACKDEPOT_MAX_FRAMES)
394                 nr_entries = CONFIG_STACKDEPOT_MAX_FRAMES;
395 
396         if (flags & STACK_DEPOT_FLAG_GET) {
397                 /*
398                  * Evictable entries have to allocate the max. size so they may
399                  * safely be re-used by differently sized allocations.
400                  */
401                 record_size = depot_stack_record_size(stack, CONFIG_STACKDEPOT_MAX_FRAMES);
402                 stack = depot_pop_free();
403         } else {
404                 record_size = depot_stack_record_size(stack, nr_entries);
405         }
406 
407         if (!stack) {
408                 stack = depot_pop_free_pool(prealloc, record_size);
409                 if (!stack)
410                         return NULL;
411         }
412 
413         /* Save the stack trace. */
414         stack->hash = hash;
415         stack->size = nr_entries;
416         /* stack->handle is already filled in by depot_pop_free_pool(). */
417         memcpy(stack->entries, entries, flex_array_size(stack, entries, nr_entries));
418 
419         if (flags & STACK_DEPOT_FLAG_GET) {
420                 refcount_set(&stack->count, 1);
421                 counters[DEPOT_COUNTER_REFD_ALLOCS]++;
422                 counters[DEPOT_COUNTER_REFD_INUSE]++;
423         } else {
424                 /* Warn on attempts to switch to refcounting this entry. */
425                 refcount_set(&stack->count, REFCOUNT_SATURATED);
426                 counters[DEPOT_COUNTER_PERSIST_COUNT]++;
427                 counters[DEPOT_COUNTER_PERSIST_BYTES] += record_size;
428         }
429 
430         /*
431          * Let KMSAN know the stored stack record is initialized. This shall
432          * prevent false positive reports if instrumented code accesses it.
433          */
434         kmsan_unpoison_memory(stack, record_size);
435 
436         return stack;
437 }
438 
439 static struct stack_record *depot_fetch_stack(depot_stack_handle_t handle)
440 {
441         const int pools_num_cached = READ_ONCE(pools_num);
442         union handle_parts parts = { .handle = handle };
443         void *pool;
444         u32 pool_index = parts.pool_index_plus_1 - 1;
445         size_t offset = parts.offset << DEPOT_STACK_ALIGN;
446         struct stack_record *stack;
447 
448         lockdep_assert_not_held(&pool_lock);
449 
450         if (pool_index >= pools_num_cached) {
451                 WARN(1, "pool index %d out of bounds (%d) for stack id %08x\n",
452                      pool_index, pools_num_cached, handle);
453                 return NULL;
454         }
455 
456         pool = stack_pools[pool_index];
457         if (WARN_ON(!pool))
458                 return NULL;
459 
460         stack = pool + offset;
461         if (WARN_ON(!refcount_read(&stack->count)))
462                 return NULL;
463 
464         return stack;
465 }
466 
467 /* Links stack into the freelist. */
468 static void depot_free_stack(struct stack_record *stack)
469 {
470         unsigned long flags;
471 
472         lockdep_assert_not_held(&pool_lock);
473 
474         raw_spin_lock_irqsave(&pool_lock, flags);
475         printk_deferred_enter();
476 
477         /*
478          * Remove the entry from the hash list. Concurrent list traversal may
479          * still observe the entry, but since the refcount is zero, this entry
480          * will no longer be considered as valid.
481          */
482         list_del_rcu(&stack->hash_list);
483 
484         /*
485          * Due to being used from constrained contexts such as the allocators,
486          * NMI, or even RCU itself, stack depot cannot rely on primitives that
487          * would sleep (such as synchronize_rcu()) or recursively call into
488          * stack depot again (such as call_rcu()).
489          *
490          * Instead, get an RCU cookie, so that we can ensure this entry isn't
491          * moved onto another list until the next grace period, and concurrent
492          * RCU list traversal remains safe.
493          */
494         stack->rcu_state = get_state_synchronize_rcu();
495 
496         /*
497          * Add the entry to the freelist tail, so that older entries are
498          * considered first - their RCU cookie is more likely to no longer be
499          * associated with the current grace period.
500          */
501         list_add_tail(&stack->free_list, &free_stacks);
502 
503         counters[DEPOT_COUNTER_FREELIST_SIZE]++;
504         counters[DEPOT_COUNTER_REFD_FREES]++;
505         counters[DEPOT_COUNTER_REFD_INUSE]--;
506 
507         printk_deferred_exit();
508         raw_spin_unlock_irqrestore(&pool_lock, flags);
509 }
510 
511 /* Calculates the hash for a stack. */
512 static inline u32 hash_stack(unsigned long *entries, unsigned int size)
513 {
514         return jhash2((u32 *)entries,
515                       array_size(size,  sizeof(*entries)) / sizeof(u32),
516                       STACK_HASH_SEED);
517 }
518 
519 /*
520  * Non-instrumented version of memcmp().
521  * Does not check the lexicographical order, only the equality.
522  */
523 static inline
524 int stackdepot_memcmp(const unsigned long *u1, const unsigned long *u2,
525                         unsigned int n)
526 {
527         for ( ; n-- ; u1++, u2++) {
528                 if (*u1 != *u2)
529                         return 1;
530         }
531         return 0;
532 }
533 
534 /* Finds a stack in a bucket of the hash table. */
535 static inline struct stack_record *find_stack(struct list_head *bucket,
536                                               unsigned long *entries, int size,
537                                               u32 hash, depot_flags_t flags)
538 {
539         struct stack_record *stack, *ret = NULL;
540 
541         /*
542          * Stack depot may be used from instrumentation that instruments RCU or
543          * tracing itself; use variant that does not call into RCU and cannot be
544          * traced.
545          *
546          * Note: Such use cases must take care when using refcounting to evict
547          * unused entries, because the stack record free-then-reuse code paths
548          * do call into RCU.
549          */
550         rcu_read_lock_sched_notrace();
551 
552         list_for_each_entry_rcu(stack, bucket, hash_list) {
553                 if (stack->hash != hash || stack->size != size)
554                         continue;
555 
556                 /*
557                  * This may race with depot_free_stack() accessing the freelist
558                  * management state unioned with @entries. The refcount is zero
559                  * in that case and the below refcount_inc_not_zero() will fail.
560                  */
561                 if (data_race(stackdepot_memcmp(entries, stack->entries, size)))
562                         continue;
563 
564                 /*
565                  * Try to increment refcount. If this succeeds, the stack record
566                  * is valid and has not yet been freed.
567                  *
568                  * If STACK_DEPOT_FLAG_GET is not used, it is undefined behavior
569                  * to then call stack_depot_put() later, and we can assume that
570                  * a stack record is never placed back on the freelist.
571                  */
572                 if ((flags & STACK_DEPOT_FLAG_GET) && !refcount_inc_not_zero(&stack->count))
573                         continue;
574 
575                 ret = stack;
576                 break;
577         }
578 
579         rcu_read_unlock_sched_notrace();
580 
581         return ret;
582 }
583 
584 depot_stack_handle_t stack_depot_save_flags(unsigned long *entries,
585                                             unsigned int nr_entries,
586                                             gfp_t alloc_flags,
587                                             depot_flags_t depot_flags)
588 {
589         struct list_head *bucket;
590         struct stack_record *found = NULL;
591         depot_stack_handle_t handle = 0;
592         struct page *page = NULL;
593         void *prealloc = NULL;
594         bool can_alloc = depot_flags & STACK_DEPOT_FLAG_CAN_ALLOC;
595         unsigned long flags;
596         u32 hash;
597 
598         if (WARN_ON(depot_flags & ~STACK_DEPOT_FLAGS_MASK))
599                 return 0;
600 
601         /*
602          * If this stack trace is from an interrupt, including anything before
603          * interrupt entry usually leads to unbounded stack depot growth.
604          *
605          * Since use of filter_irq_stacks() is a requirement to ensure stack
606          * depot can efficiently deduplicate interrupt stacks, always
607          * filter_irq_stacks() to simplify all callers' use of stack depot.
608          */
609         nr_entries = filter_irq_stacks(entries, nr_entries);
610 
611         if (unlikely(nr_entries == 0) || stack_depot_disabled)
612                 return 0;
613 
614         hash = hash_stack(entries, nr_entries);
615         bucket = &stack_table[hash & stack_hash_mask];
616 
617         /* Fast path: look the stack trace up without locking. */
618         found = find_stack(bucket, entries, nr_entries, hash, depot_flags);
619         if (found)
620                 goto exit;
621 
622         /*
623          * Allocate memory for a new pool if required now:
624          * we won't be able to do that under the lock.
625          */
626         if (unlikely(can_alloc && !READ_ONCE(new_pool))) {
627                 page = alloc_pages(gfp_nested_mask(alloc_flags),
628                                    DEPOT_POOL_ORDER);
629                 if (page)
630                         prealloc = page_address(page);
631         }
632 
633         raw_spin_lock_irqsave(&pool_lock, flags);
634         printk_deferred_enter();
635 
636         /* Try to find again, to avoid concurrently inserting duplicates. */
637         found = find_stack(bucket, entries, nr_entries, hash, depot_flags);
638         if (!found) {
639                 struct stack_record *new =
640                         depot_alloc_stack(entries, nr_entries, hash, depot_flags, &prealloc);
641 
642                 if (new) {
643                         /*
644                          * This releases the stack record into the bucket and
645                          * makes it visible to readers in find_stack().
646                          */
647                         list_add_rcu(&new->hash_list, bucket);
648                         found = new;
649                 }
650         }
651 
652         if (prealloc) {
653                 /*
654                  * Either stack depot already contains this stack trace, or
655                  * depot_alloc_stack() did not consume the preallocated memory.
656                  * Try to keep the preallocated memory for future.
657                  */
658                 depot_keep_new_pool(&prealloc);
659         }
660 
661         printk_deferred_exit();
662         raw_spin_unlock_irqrestore(&pool_lock, flags);
663 exit:
664         if (prealloc) {
665                 /* Stack depot didn't use this memory, free it. */
666                 free_pages((unsigned long)prealloc, DEPOT_POOL_ORDER);
667         }
668         if (found)
669                 handle = found->handle.handle;
670         return handle;
671 }
672 EXPORT_SYMBOL_GPL(stack_depot_save_flags);
673 
674 depot_stack_handle_t stack_depot_save(unsigned long *entries,
675                                       unsigned int nr_entries,
676                                       gfp_t alloc_flags)
677 {
678         return stack_depot_save_flags(entries, nr_entries, alloc_flags,
679                                       STACK_DEPOT_FLAG_CAN_ALLOC);
680 }
681 EXPORT_SYMBOL_GPL(stack_depot_save);
682 
683 struct stack_record *__stack_depot_get_stack_record(depot_stack_handle_t handle)
684 {
685         if (!handle)
686                 return NULL;
687 
688         return depot_fetch_stack(handle);
689 }
690 
691 unsigned int stack_depot_fetch(depot_stack_handle_t handle,
692                                unsigned long **entries)
693 {
694         struct stack_record *stack;
695 
696         *entries = NULL;
697         /*
698          * Let KMSAN know *entries is initialized. This shall prevent false
699          * positive reports if instrumented code accesses it.
700          */
701         kmsan_unpoison_memory(entries, sizeof(*entries));
702 
703         if (!handle || stack_depot_disabled)
704                 return 0;
705 
706         stack = depot_fetch_stack(handle);
707         /*
708          * Should never be NULL, otherwise this is a use-after-put (or just a
709          * corrupt handle).
710          */
711         if (WARN(!stack, "corrupt handle or use after stack_depot_put()"))
712                 return 0;
713 
714         *entries = stack->entries;
715         return stack->size;
716 }
717 EXPORT_SYMBOL_GPL(stack_depot_fetch);
718 
719 void stack_depot_put(depot_stack_handle_t handle)
720 {
721         struct stack_record *stack;
722 
723         if (!handle || stack_depot_disabled)
724                 return;
725 
726         stack = depot_fetch_stack(handle);
727         /*
728          * Should always be able to find the stack record, otherwise this is an
729          * unbalanced put attempt (or corrupt handle).
730          */
731         if (WARN(!stack, "corrupt handle or unbalanced stack_depot_put()"))
732                 return;
733 
734         if (refcount_dec_and_test(&stack->count))
735                 depot_free_stack(stack);
736 }
737 EXPORT_SYMBOL_GPL(stack_depot_put);
738 
739 void stack_depot_print(depot_stack_handle_t stack)
740 {
741         unsigned long *entries;
742         unsigned int nr_entries;
743 
744         nr_entries = stack_depot_fetch(stack, &entries);
745         if (nr_entries > 0)
746                 stack_trace_print(entries, nr_entries, 0);
747 }
748 EXPORT_SYMBOL_GPL(stack_depot_print);
749 
750 int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size,
751                        int spaces)
752 {
753         unsigned long *entries;
754         unsigned int nr_entries;
755 
756         nr_entries = stack_depot_fetch(handle, &entries);
757         return nr_entries ? stack_trace_snprint(buf, size, entries, nr_entries,
758                                                 spaces) : 0;
759 }
760 EXPORT_SYMBOL_GPL(stack_depot_snprint);
761 
762 depot_stack_handle_t __must_check stack_depot_set_extra_bits(
763                         depot_stack_handle_t handle, unsigned int extra_bits)
764 {
765         union handle_parts parts = { .handle = handle };
766 
767         /* Don't set extra bits on empty handles. */
768         if (!handle)
769                 return 0;
770 
771         parts.extra = extra_bits;
772         return parts.handle;
773 }
774 EXPORT_SYMBOL(stack_depot_set_extra_bits);
775 
776 unsigned int stack_depot_get_extra_bits(depot_stack_handle_t handle)
777 {
778         union handle_parts parts = { .handle = handle };
779 
780         return parts.extra;
781 }
782 EXPORT_SYMBOL(stack_depot_get_extra_bits);
783 
784 static int stats_show(struct seq_file *seq, void *v)
785 {
786         /*
787          * data race ok: These are just statistics counters, and approximate
788          * statistics are ok for debugging.
789          */
790         seq_printf(seq, "pools: %d\n", data_race(pools_num));
791         for (int i = 0; i < DEPOT_COUNTER_COUNT; i++)
792                 seq_printf(seq, "%s: %ld\n", counter_names[i], data_race(counters[i]));
793 
794         return 0;
795 }
796 DEFINE_SHOW_ATTRIBUTE(stats);
797 
798 static int depot_debugfs_init(void)
799 {
800         struct dentry *dir;
801 
802         if (stack_depot_disabled)
803                 return 0;
804 
805         dir = debugfs_create_dir("stackdepot", NULL);
806         debugfs_create_file("stats", 0444, dir, NULL, &stats_fops);
807         return 0;
808 }
809 late_initcall(depot_debugfs_init);
810 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php