~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/tools/perf/util/bpf_skel/lock_contention.bpf.c

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
  2 // Copyright (c) 2022 Google
  3 #include "vmlinux.h"
  4 #include <bpf/bpf_helpers.h>
  5 #include <bpf/bpf_tracing.h>
  6 #include <bpf/bpf_core_read.h>
  7 #include <asm-generic/errno-base.h>
  8 
  9 #include "lock_data.h"
 10 
 11 /* for collect_lock_syms().  4096 was rejected by the verifier */
 12 #define MAX_CPUS  1024
 13 
 14 /* lock contention flags from include/trace/events/lock.h */
 15 #define LCB_F_SPIN      (1U << 0)
 16 #define LCB_F_READ      (1U << 1)
 17 #define LCB_F_WRITE     (1U << 2)
 18 #define LCB_F_RT        (1U << 3)
 19 #define LCB_F_PERCPU    (1U << 4)
 20 #define LCB_F_MUTEX     (1U << 5)
 21 
 22 /* callstack storage  */
 23 struct {
 24         __uint(type, BPF_MAP_TYPE_STACK_TRACE);
 25         __uint(key_size, sizeof(__u32));
 26         __uint(value_size, sizeof(__u64));
 27         __uint(max_entries, MAX_ENTRIES);
 28 } stacks SEC(".maps");
 29 
 30 /* maintain timestamp at the beginning of contention */
 31 struct {
 32         __uint(type, BPF_MAP_TYPE_HASH);
 33         __type(key, int);
 34         __type(value, struct tstamp_data);
 35         __uint(max_entries, MAX_ENTRIES);
 36 } tstamp SEC(".maps");
 37 
 38 /* maintain per-CPU timestamp at the beginning of contention */
 39 struct {
 40         __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
 41         __uint(key_size, sizeof(__u32));
 42         __uint(value_size, sizeof(struct tstamp_data));
 43         __uint(max_entries, 1);
 44 } tstamp_cpu SEC(".maps");
 45 
 46 /* actual lock contention statistics */
 47 struct {
 48         __uint(type, BPF_MAP_TYPE_HASH);
 49         __uint(key_size, sizeof(struct contention_key));
 50         __uint(value_size, sizeof(struct contention_data));
 51         __uint(max_entries, MAX_ENTRIES);
 52 } lock_stat SEC(".maps");
 53 
 54 struct {
 55         __uint(type, BPF_MAP_TYPE_HASH);
 56         __uint(key_size, sizeof(__u32));
 57         __uint(value_size, sizeof(struct contention_task_data));
 58         __uint(max_entries, MAX_ENTRIES);
 59 } task_data SEC(".maps");
 60 
 61 struct {
 62         __uint(type, BPF_MAP_TYPE_HASH);
 63         __uint(key_size, sizeof(__u64));
 64         __uint(value_size, sizeof(__u32));
 65         __uint(max_entries, MAX_ENTRIES);
 66 } lock_syms SEC(".maps");
 67 
 68 struct {
 69         __uint(type, BPF_MAP_TYPE_HASH);
 70         __uint(key_size, sizeof(__u32));
 71         __uint(value_size, sizeof(__u8));
 72         __uint(max_entries, 1);
 73 } cpu_filter SEC(".maps");
 74 
 75 struct {
 76         __uint(type, BPF_MAP_TYPE_HASH);
 77         __uint(key_size, sizeof(__u32));
 78         __uint(value_size, sizeof(__u8));
 79         __uint(max_entries, 1);
 80 } task_filter SEC(".maps");
 81 
 82 struct {
 83         __uint(type, BPF_MAP_TYPE_HASH);
 84         __uint(key_size, sizeof(__u32));
 85         __uint(value_size, sizeof(__u8));
 86         __uint(max_entries, 1);
 87 } type_filter SEC(".maps");
 88 
 89 struct {
 90         __uint(type, BPF_MAP_TYPE_HASH);
 91         __uint(key_size, sizeof(__u64));
 92         __uint(value_size, sizeof(__u8));
 93         __uint(max_entries, 1);
 94 } addr_filter SEC(".maps");
 95 
 96 struct {
 97         __uint(type, BPF_MAP_TYPE_HASH);
 98         __uint(key_size, sizeof(__u64));
 99         __uint(value_size, sizeof(__u8));
100         __uint(max_entries, 1);
101 } cgroup_filter SEC(".maps");
102 
103 struct rw_semaphore___old {
104         struct task_struct *owner;
105 } __attribute__((preserve_access_index));
106 
107 struct rw_semaphore___new {
108         atomic_long_t owner;
109 } __attribute__((preserve_access_index));
110 
111 struct mm_struct___old {
112         struct rw_semaphore mmap_sem;
113 } __attribute__((preserve_access_index));
114 
115 struct mm_struct___new {
116         struct rw_semaphore mmap_lock;
117 } __attribute__((preserve_access_index));
118 
119 /* control flags */
120 int enabled;
121 int has_cpu;
122 int has_task;
123 int has_type;
124 int has_addr;
125 int has_cgroup;
126 int needs_callstack;
127 int stack_skip;
128 int lock_owner;
129 
130 int use_cgroup_v2;
131 int perf_subsys_id = -1;
132 
133 /* determine the key of lock stat */
134 int aggr_mode;
135 
136 __u64 end_ts;
137 
138 /* error stat */
139 int task_fail;
140 int stack_fail;
141 int time_fail;
142 int data_fail;
143 
144 int task_map_full;
145 int data_map_full;
146 
147 static inline __u64 get_current_cgroup_id(void)
148 {
149         struct task_struct *task;
150         struct cgroup *cgrp;
151 
152         if (use_cgroup_v2)
153                 return bpf_get_current_cgroup_id();
154 
155         task = bpf_get_current_task_btf();
156 
157         if (perf_subsys_id == -1) {
158 #if __has_builtin(__builtin_preserve_enum_value)
159                 perf_subsys_id = bpf_core_enum_value(enum cgroup_subsys_id,
160                                                      perf_event_cgrp_id);
161 #else
162                 perf_subsys_id = perf_event_cgrp_id;
163 #endif
164         }
165 
166         cgrp = BPF_CORE_READ(task, cgroups, subsys[perf_subsys_id], cgroup);
167         return BPF_CORE_READ(cgrp, kn, id);
168 }
169 
170 static inline int can_record(u64 *ctx)
171 {
172         if (has_cpu) {
173                 __u32 cpu = bpf_get_smp_processor_id();
174                 __u8 *ok;
175 
176                 ok = bpf_map_lookup_elem(&cpu_filter, &cpu);
177                 if (!ok)
178                         return 0;
179         }
180 
181         if (has_task) {
182                 __u8 *ok;
183                 __u32 pid = bpf_get_current_pid_tgid();
184 
185                 ok = bpf_map_lookup_elem(&task_filter, &pid);
186                 if (!ok)
187                         return 0;
188         }
189 
190         if (has_type) {
191                 __u8 *ok;
192                 __u32 flags = (__u32)ctx[1];
193 
194                 ok = bpf_map_lookup_elem(&type_filter, &flags);
195                 if (!ok)
196                         return 0;
197         }
198 
199         if (has_addr) {
200                 __u8 *ok;
201                 __u64 addr = ctx[0];
202 
203                 ok = bpf_map_lookup_elem(&addr_filter, &addr);
204                 if (!ok)
205                         return 0;
206         }
207 
208         if (has_cgroup) {
209                 __u8 *ok;
210                 __u64 cgrp = get_current_cgroup_id();
211 
212                 ok = bpf_map_lookup_elem(&cgroup_filter, &cgrp);
213                 if (!ok)
214                         return 0;
215         }
216 
217         return 1;
218 }
219 
220 static inline int update_task_data(struct task_struct *task)
221 {
222         struct contention_task_data *p;
223         int pid, err;
224 
225         err = bpf_core_read(&pid, sizeof(pid), &task->pid);
226         if (err)
227                 return -1;
228 
229         p = bpf_map_lookup_elem(&task_data, &pid);
230         if (p == NULL && !task_map_full) {
231                 struct contention_task_data data = {};
232 
233                 BPF_CORE_READ_STR_INTO(&data.comm, task, comm);
234                 if (bpf_map_update_elem(&task_data, &pid, &data, BPF_NOEXIST) == -E2BIG)
235                         task_map_full = 1;
236         }
237 
238         return 0;
239 }
240 
241 #ifndef __has_builtin
242 # define __has_builtin(x) 0
243 #endif
244 
245 static inline struct task_struct *get_lock_owner(__u64 lock, __u32 flags)
246 {
247         struct task_struct *task;
248         __u64 owner = 0;
249 
250         if (flags & LCB_F_MUTEX) {
251                 struct mutex *mutex = (void *)lock;
252                 owner = BPF_CORE_READ(mutex, owner.counter);
253         } else if (flags == LCB_F_READ || flags == LCB_F_WRITE) {
254         /*
255          * Support for the BPF_TYPE_MATCHES argument to the
256          * __builtin_preserve_type_info builtin was added at some point during
257          * development of clang 15 and it's what is needed for
258          * bpf_core_type_matches.
259          */
260 #if __has_builtin(__builtin_preserve_type_info) && __clang_major__ >= 15
261                 if (bpf_core_type_matches(struct rw_semaphore___old)) {
262                         struct rw_semaphore___old *rwsem = (void *)lock;
263                         owner = (unsigned long)BPF_CORE_READ(rwsem, owner);
264                 } else if (bpf_core_type_matches(struct rw_semaphore___new)) {
265                         struct rw_semaphore___new *rwsem = (void *)lock;
266                         owner = BPF_CORE_READ(rwsem, owner.counter);
267                 }
268 #else
269                 /* assume new struct */
270                 struct rw_semaphore *rwsem = (void *)lock;
271                 owner = BPF_CORE_READ(rwsem, owner.counter);
272 #endif
273         }
274 
275         if (!owner)
276                 return NULL;
277 
278         task = (void *)(owner & ~7UL);
279         return task;
280 }
281 
282 static inline __u32 check_lock_type(__u64 lock, __u32 flags)
283 {
284         struct task_struct *curr;
285         struct mm_struct___old *mm_old;
286         struct mm_struct___new *mm_new;
287         struct sighand_struct *sighand;
288 
289         switch (flags) {
290         case LCB_F_READ:  /* rwsem */
291         case LCB_F_WRITE:
292                 curr = bpf_get_current_task_btf();
293                 if (curr->mm == NULL)
294                         break;
295                 mm_new = (void *)curr->mm;
296                 if (bpf_core_field_exists(mm_new->mmap_lock)) {
297                         if (&mm_new->mmap_lock == (void *)lock)
298                                 return LCD_F_MMAP_LOCK;
299                         break;
300                 }
301                 mm_old = (void *)curr->mm;
302                 if (bpf_core_field_exists(mm_old->mmap_sem)) {
303                         if (&mm_old->mmap_sem == (void *)lock)
304                                 return LCD_F_MMAP_LOCK;
305                 }
306                 break;
307         case LCB_F_SPIN:  /* spinlock */
308                 curr = bpf_get_current_task_btf();
309                 sighand = curr->sighand;
310 
311                 if (sighand && &sighand->siglock == (void *)lock)
312                         return LCD_F_SIGHAND_LOCK;
313                 break;
314         default:
315                 break;
316         }
317         return 0;
318 }
319 
320 static inline struct tstamp_data *get_tstamp_elem(__u32 flags)
321 {
322         __u32 pid;
323         struct tstamp_data *pelem;
324 
325         /* Use per-cpu array map for spinlock and rwlock */
326         if (flags == (LCB_F_SPIN | LCB_F_READ) || flags == LCB_F_SPIN ||
327             flags == (LCB_F_SPIN | LCB_F_WRITE)) {
328                 __u32 idx = 0;
329 
330                 pelem = bpf_map_lookup_elem(&tstamp_cpu, &idx);
331                 /* Do not update the element for nested locks */
332                 if (pelem && pelem->lock)
333                         pelem = NULL;
334                 return pelem;
335         }
336 
337         pid = bpf_get_current_pid_tgid();
338         pelem = bpf_map_lookup_elem(&tstamp, &pid);
339         /* Do not update the element for nested locks */
340         if (pelem && pelem->lock)
341                 return NULL;
342 
343         if (pelem == NULL) {
344                 struct tstamp_data zero = {};
345 
346                 if (bpf_map_update_elem(&tstamp, &pid, &zero, BPF_NOEXIST) < 0) {
347                         __sync_fetch_and_add(&task_fail, 1);
348                         return NULL;
349                 }
350 
351                 pelem = bpf_map_lookup_elem(&tstamp, &pid);
352                 if (pelem == NULL) {
353                         __sync_fetch_and_add(&task_fail, 1);
354                         return NULL;
355                 }
356         }
357         return pelem;
358 }
359 
360 SEC("tp_btf/contention_begin")
361 int contention_begin(u64 *ctx)
362 {
363         struct tstamp_data *pelem;
364 
365         if (!enabled || !can_record(ctx))
366                 return 0;
367 
368         pelem = get_tstamp_elem(ctx[1]);
369         if (pelem == NULL)
370                 return 0;
371 
372         pelem->timestamp = bpf_ktime_get_ns();
373         pelem->lock = (__u64)ctx[0];
374         pelem->flags = (__u32)ctx[1];
375 
376         if (needs_callstack) {
377                 pelem->stack_id = bpf_get_stackid(ctx, &stacks,
378                                                   BPF_F_FAST_STACK_CMP | stack_skip);
379                 if (pelem->stack_id < 0)
380                         __sync_fetch_and_add(&stack_fail, 1);
381         } else if (aggr_mode == LOCK_AGGR_TASK) {
382                 struct task_struct *task;
383 
384                 if (lock_owner) {
385                         task = get_lock_owner(pelem->lock, pelem->flags);
386 
387                         /* The flags is not used anymore.  Pass the owner pid. */
388                         if (task)
389                                 pelem->flags = BPF_CORE_READ(task, pid);
390                         else
391                                 pelem->flags = -1U;
392 
393                 } else {
394                         task = bpf_get_current_task_btf();
395                 }
396 
397                 if (task) {
398                         if (update_task_data(task) < 0 && lock_owner)
399                                 pelem->flags = -1U;
400                 }
401         }
402 
403         return 0;
404 }
405 
406 SEC("tp_btf/contention_end")
407 int contention_end(u64 *ctx)
408 {
409         __u32 pid = 0, idx = 0;
410         struct tstamp_data *pelem;
411         struct contention_key key = {};
412         struct contention_data *data;
413         __u64 duration;
414         bool need_delete = false;
415 
416         if (!enabled)
417                 return 0;
418 
419         /*
420          * For spinlock and rwlock, it needs to get the timestamp for the
421          * per-cpu map.  However, contention_end does not have the flags
422          * so it cannot know whether it reads percpu or hash map.
423          *
424          * Try per-cpu map first and check if there's active contention.
425          * If it is, do not read hash map because it cannot go to sleeping
426          * locks before releasing the spinning locks.
427          */
428         pelem = bpf_map_lookup_elem(&tstamp_cpu, &idx);
429         if (pelem && pelem->lock) {
430                 if (pelem->lock != ctx[0])
431                         return 0;
432         } else {
433                 pid = bpf_get_current_pid_tgid();
434                 pelem = bpf_map_lookup_elem(&tstamp, &pid);
435                 if (!pelem || pelem->lock != ctx[0])
436                         return 0;
437                 need_delete = true;
438         }
439 
440         duration = bpf_ktime_get_ns() - pelem->timestamp;
441         if ((__s64)duration < 0) {
442                 pelem->lock = 0;
443                 if (need_delete)
444                         bpf_map_delete_elem(&tstamp, &pid);
445                 __sync_fetch_and_add(&time_fail, 1);
446                 return 0;
447         }
448 
449         switch (aggr_mode) {
450         case LOCK_AGGR_CALLER:
451                 key.stack_id = pelem->stack_id;
452                 break;
453         case LOCK_AGGR_TASK:
454                 if (lock_owner)
455                         key.pid = pelem->flags;
456                 else {
457                         if (!need_delete)
458                                 pid = bpf_get_current_pid_tgid();
459                         key.pid = pid;
460                 }
461                 if (needs_callstack)
462                         key.stack_id = pelem->stack_id;
463                 break;
464         case LOCK_AGGR_ADDR:
465                 key.lock_addr_or_cgroup = pelem->lock;
466                 if (needs_callstack)
467                         key.stack_id = pelem->stack_id;
468                 break;
469         case LOCK_AGGR_CGROUP:
470                 key.lock_addr_or_cgroup = get_current_cgroup_id();
471                 break;
472         default:
473                 /* should not happen */
474                 return 0;
475         }
476 
477         data = bpf_map_lookup_elem(&lock_stat, &key);
478         if (!data) {
479                 if (data_map_full) {
480                         pelem->lock = 0;
481                         if (need_delete)
482                                 bpf_map_delete_elem(&tstamp, &pid);
483                         __sync_fetch_and_add(&data_fail, 1);
484                         return 0;
485                 }
486 
487                 struct contention_data first = {
488                         .total_time = duration,
489                         .max_time = duration,
490                         .min_time = duration,
491                         .count = 1,
492                         .flags = pelem->flags,
493                 };
494                 int err;
495 
496                 if (aggr_mode == LOCK_AGGR_ADDR)
497                         first.flags |= check_lock_type(pelem->lock, pelem->flags);
498 
499                 err = bpf_map_update_elem(&lock_stat, &key, &first, BPF_NOEXIST);
500                 if (err < 0) {
501                         if (err == -E2BIG)
502                                 data_map_full = 1;
503                         __sync_fetch_and_add(&data_fail, 1);
504                 }
505                 pelem->lock = 0;
506                 if (need_delete)
507                         bpf_map_delete_elem(&tstamp, &pid);
508                 return 0;
509         }
510 
511         __sync_fetch_and_add(&data->total_time, duration);
512         __sync_fetch_and_add(&data->count, 1);
513 
514         /* FIXME: need atomic operations */
515         if (data->max_time < duration)
516                 data->max_time = duration;
517         if (data->min_time > duration)
518                 data->min_time = duration;
519 
520         pelem->lock = 0;
521         if (need_delete)
522                 bpf_map_delete_elem(&tstamp, &pid);
523         return 0;
524 }
525 
526 extern struct rq runqueues __ksym;
527 
528 struct rq___old {
529         raw_spinlock_t lock;
530 } __attribute__((preserve_access_index));
531 
532 struct rq___new {
533         raw_spinlock_t __lock;
534 } __attribute__((preserve_access_index));
535 
536 SEC("raw_tp/bpf_test_finish")
537 int BPF_PROG(collect_lock_syms)
538 {
539         __u64 lock_addr, lock_off;
540         __u32 lock_flag;
541 
542         if (bpf_core_field_exists(struct rq___new, __lock))
543                 lock_off = offsetof(struct rq___new, __lock);
544         else
545                 lock_off = offsetof(struct rq___old, lock);
546 
547         for (int i = 0; i < MAX_CPUS; i++) {
548                 struct rq *rq = bpf_per_cpu_ptr(&runqueues, i);
549 
550                 if (rq == NULL)
551                         break;
552 
553                 lock_addr = (__u64)(void *)rq + lock_off;
554                 lock_flag = LOCK_CLASS_RQLOCK;
555                 bpf_map_update_elem(&lock_syms, &lock_addr, &lock_flag, BPF_ANY);
556         }
557         return 0;
558 }
559 
560 SEC("raw_tp/bpf_test_finish")
561 int BPF_PROG(end_timestamp)
562 {
563         end_ts = bpf_ktime_get_ns();
564         return 0;
565 }
566 
567 char LICENSE[] SEC("license") = "Dual BSD/GPL";
568 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php