~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/loongarch/kernel/perf_event.c

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0
  2 /*
  3  * Linux performance counter support for LoongArch.
  4  *
  5  * Copyright (C) 2022 Loongson Technology Corporation Limited
  6  *
  7  * Derived from MIPS:
  8  * Copyright (C) 2010 MIPS Technologies, Inc.
  9  * Copyright (C) 2011 Cavium Networks, Inc.
 10  * Author: Deng-Cheng Zhu
 11  */
 12 
 13 #include <linux/cpumask.h>
 14 #include <linux/interrupt.h>
 15 #include <linux/smp.h>
 16 #include <linux/kernel.h>
 17 #include <linux/perf_event.h>
 18 #include <linux/uaccess.h>
 19 #include <linux/sched/task_stack.h>
 20 
 21 #include <asm/irq.h>
 22 #include <asm/irq_regs.h>
 23 #include <asm/stacktrace.h>
 24 #include <asm/unwind.h>
 25 
 26 /*
 27  * Get the return address for a single stackframe and return a pointer to the
 28  * next frame tail.
 29  */
 30 static unsigned long
 31 user_backtrace(struct perf_callchain_entry_ctx *entry, unsigned long fp)
 32 {
 33         unsigned long err;
 34         unsigned long __user *user_frame_tail;
 35         struct stack_frame buftail;
 36 
 37         user_frame_tail = (unsigned long __user *)(fp - sizeof(struct stack_frame));
 38 
 39         /* Also check accessibility of one struct frame_tail beyond */
 40         if (!access_ok(user_frame_tail, sizeof(buftail)))
 41                 return 0;
 42 
 43         pagefault_disable();
 44         err = __copy_from_user_inatomic(&buftail, user_frame_tail, sizeof(buftail));
 45         pagefault_enable();
 46 
 47         if (err || (unsigned long)user_frame_tail >= buftail.fp)
 48                 return 0;
 49 
 50         perf_callchain_store(entry, buftail.ra);
 51 
 52         return buftail.fp;
 53 }
 54 
 55 void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
 56                          struct pt_regs *regs)
 57 {
 58         unsigned long fp;
 59 
 60         if (perf_guest_state()) {
 61                 /* We don't support guest os callchain now */
 62                 return;
 63         }
 64 
 65         perf_callchain_store(entry, regs->csr_era);
 66 
 67         fp = regs->regs[22];
 68 
 69         while (entry->nr < entry->max_stack && fp && !((unsigned long)fp & 0xf))
 70                 fp = user_backtrace(entry, fp);
 71 }
 72 
 73 void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
 74                            struct pt_regs *regs)
 75 {
 76         struct unwind_state state;
 77         unsigned long addr;
 78 
 79         for (unwind_start(&state, current, regs);
 80               !unwind_done(&state); unwind_next_frame(&state)) {
 81                 addr = unwind_get_return_address(&state);
 82                 if (!addr || perf_callchain_store(entry, addr))
 83                         return;
 84         }
 85 }
 86 
 87 #define LOONGARCH_MAX_HWEVENTS 32
 88 
 89 struct cpu_hw_events {
 90         /* Array of events on this cpu. */
 91         struct perf_event       *events[LOONGARCH_MAX_HWEVENTS];
 92 
 93         /*
 94          * Set the bit (indexed by the counter number) when the counter
 95          * is used for an event.
 96          */
 97         unsigned long           used_mask[BITS_TO_LONGS(LOONGARCH_MAX_HWEVENTS)];
 98 
 99         /*
100          * Software copy of the control register for each performance counter.
101          */
102         unsigned int            saved_ctrl[LOONGARCH_MAX_HWEVENTS];
103 };
104 static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
105         .saved_ctrl = {0},
106 };
107 
108 /* The description of LoongArch performance events. */
109 struct loongarch_perf_event {
110         unsigned int event_id;
111 };
112 
113 static struct loongarch_perf_event raw_event;
114 static DEFINE_MUTEX(raw_event_mutex);
115 
116 #define C(x) PERF_COUNT_HW_CACHE_##x
117 #define HW_OP_UNSUPPORTED               0xffffffff
118 #define CACHE_OP_UNSUPPORTED            0xffffffff
119 
120 #define PERF_MAP_ALL_UNSUPPORTED                                        \
121         [0 ... PERF_COUNT_HW_MAX - 1] = {HW_OP_UNSUPPORTED}
122 
123 #define PERF_CACHE_MAP_ALL_UNSUPPORTED                                  \
124 [0 ... C(MAX) - 1] = {                                                  \
125         [0 ... C(OP_MAX) - 1] = {                                       \
126                 [0 ... C(RESULT_MAX) - 1] = {CACHE_OP_UNSUPPORTED},     \
127         },                                                              \
128 }
129 
130 struct loongarch_pmu {
131         u64             max_period;
132         u64             valid_count;
133         u64             overflow;
134         const char      *name;
135         unsigned int    num_counters;
136         u64             (*read_counter)(unsigned int idx);
137         void            (*write_counter)(unsigned int idx, u64 val);
138         const struct loongarch_perf_event *(*map_raw_event)(u64 config);
139         const struct loongarch_perf_event (*general_event_map)[PERF_COUNT_HW_MAX];
140         const struct loongarch_perf_event (*cache_event_map)
141                                 [PERF_COUNT_HW_CACHE_MAX]
142                                 [PERF_COUNT_HW_CACHE_OP_MAX]
143                                 [PERF_COUNT_HW_CACHE_RESULT_MAX];
144 };
145 
146 static struct loongarch_pmu loongarch_pmu;
147 
148 #define M_PERFCTL_EVENT(event)  (event & CSR_PERFCTRL_EVENT)
149 
150 #define M_PERFCTL_COUNT_EVENT_WHENEVER  (CSR_PERFCTRL_PLV0 |    \
151                                         CSR_PERFCTRL_PLV1 |     \
152                                         CSR_PERFCTRL_PLV2 |     \
153                                         CSR_PERFCTRL_PLV3 |     \
154                                         CSR_PERFCTRL_IE)
155 
156 #define M_PERFCTL_CONFIG_MASK           0x1f0000
157 
158 static void pause_local_counters(void);
159 static void resume_local_counters(void);
160 
161 static u64 loongarch_pmu_read_counter(unsigned int idx)
162 {
163         u64 val = -1;
164 
165         switch (idx) {
166         case 0:
167                 val = read_csr_perfcntr0();
168                 break;
169         case 1:
170                 val = read_csr_perfcntr1();
171                 break;
172         case 2:
173                 val = read_csr_perfcntr2();
174                 break;
175         case 3:
176                 val = read_csr_perfcntr3();
177                 break;
178         default:
179                 WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx);
180                 return 0;
181         }
182 
183         return val;
184 }
185 
186 static void loongarch_pmu_write_counter(unsigned int idx, u64 val)
187 {
188         switch (idx) {
189         case 0:
190                 write_csr_perfcntr0(val);
191                 return;
192         case 1:
193                 write_csr_perfcntr1(val);
194                 return;
195         case 2:
196                 write_csr_perfcntr2(val);
197                 return;
198         case 3:
199                 write_csr_perfcntr3(val);
200                 return;
201         default:
202                 WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx);
203                 return;
204         }
205 }
206 
207 static unsigned int loongarch_pmu_read_control(unsigned int idx)
208 {
209         unsigned int val = -1;
210 
211         switch (idx) {
212         case 0:
213                 val = read_csr_perfctrl0();
214                 break;
215         case 1:
216                 val = read_csr_perfctrl1();
217                 break;
218         case 2:
219                 val = read_csr_perfctrl2();
220                 break;
221         case 3:
222                 val = read_csr_perfctrl3();
223                 break;
224         default:
225                 WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx);
226                 return 0;
227         }
228 
229         return val;
230 }
231 
232 static void loongarch_pmu_write_control(unsigned int idx, unsigned int val)
233 {
234         switch (idx) {
235         case 0:
236                 write_csr_perfctrl0(val);
237                 return;
238         case 1:
239                 write_csr_perfctrl1(val);
240                 return;
241         case 2:
242                 write_csr_perfctrl2(val);
243                 return;
244         case 3:
245                 write_csr_perfctrl3(val);
246                 return;
247         default:
248                 WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx);
249                 return;
250         }
251 }
252 
253 static int loongarch_pmu_alloc_counter(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
254 {
255         int i;
256 
257         for (i = 0; i < loongarch_pmu.num_counters; i++) {
258                 if (!test_and_set_bit(i, cpuc->used_mask))
259                         return i;
260         }
261 
262         return -EAGAIN;
263 }
264 
265 static void loongarch_pmu_enable_event(struct hw_perf_event *evt, int idx)
266 {
267         unsigned int cpu;
268         struct perf_event *event = container_of(evt, struct perf_event, hw);
269         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
270 
271         WARN_ON(idx < 0 || idx >= loongarch_pmu.num_counters);
272 
273         /* Make sure interrupt enabled. */
274         cpuc->saved_ctrl[idx] = M_PERFCTL_EVENT(evt->event_base) |
275                 (evt->config_base & M_PERFCTL_CONFIG_MASK) | CSR_PERFCTRL_IE;
276 
277         cpu = (event->cpu >= 0) ? event->cpu : smp_processor_id();
278 
279         /*
280          * We do not actually let the counter run. Leave it until start().
281          */
282         pr_debug("Enabling perf counter for CPU%d\n", cpu);
283 }
284 
285 static void loongarch_pmu_disable_event(int idx)
286 {
287         unsigned long flags;
288         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
289 
290         WARN_ON(idx < 0 || idx >= loongarch_pmu.num_counters);
291 
292         local_irq_save(flags);
293         cpuc->saved_ctrl[idx] = loongarch_pmu_read_control(idx) &
294                 ~M_PERFCTL_COUNT_EVENT_WHENEVER;
295         loongarch_pmu_write_control(idx, cpuc->saved_ctrl[idx]);
296         local_irq_restore(flags);
297 }
298 
299 static int loongarch_pmu_event_set_period(struct perf_event *event,
300                                     struct hw_perf_event *hwc,
301                                     int idx)
302 {
303         int ret = 0;
304         u64 left = local64_read(&hwc->period_left);
305         u64 period = hwc->sample_period;
306 
307         if (unlikely((left + period) & (1ULL << 63))) {
308                 /* left underflowed by more than period. */
309                 left = period;
310                 local64_set(&hwc->period_left, left);
311                 hwc->last_period = period;
312                 ret = 1;
313         } else  if (unlikely((left + period) <= period)) {
314                 /* left underflowed by less than period. */
315                 left += period;
316                 local64_set(&hwc->period_left, left);
317                 hwc->last_period = period;
318                 ret = 1;
319         }
320 
321         if (left > loongarch_pmu.max_period) {
322                 left = loongarch_pmu.max_period;
323                 local64_set(&hwc->period_left, left);
324         }
325 
326         local64_set(&hwc->prev_count, loongarch_pmu.overflow - left);
327 
328         loongarch_pmu.write_counter(idx, loongarch_pmu.overflow - left);
329 
330         perf_event_update_userpage(event);
331 
332         return ret;
333 }
334 
335 static void loongarch_pmu_event_update(struct perf_event *event,
336                                  struct hw_perf_event *hwc,
337                                  int idx)
338 {
339         u64 delta;
340         u64 prev_raw_count, new_raw_count;
341 
342 again:
343         prev_raw_count = local64_read(&hwc->prev_count);
344         new_raw_count = loongarch_pmu.read_counter(idx);
345 
346         if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
347                                 new_raw_count) != prev_raw_count)
348                 goto again;
349 
350         delta = new_raw_count - prev_raw_count;
351 
352         local64_add(delta, &event->count);
353         local64_sub(delta, &hwc->period_left);
354 }
355 
356 static void loongarch_pmu_start(struct perf_event *event, int flags)
357 {
358         struct hw_perf_event *hwc = &event->hw;
359 
360         if (flags & PERF_EF_RELOAD)
361                 WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
362 
363         hwc->state = 0;
364 
365         /* Set the period for the event. */
366         loongarch_pmu_event_set_period(event, hwc, hwc->idx);
367 
368         /* Enable the event. */
369         loongarch_pmu_enable_event(hwc, hwc->idx);
370 }
371 
372 static void loongarch_pmu_stop(struct perf_event *event, int flags)
373 {
374         struct hw_perf_event *hwc = &event->hw;
375 
376         if (!(hwc->state & PERF_HES_STOPPED)) {
377                 /* We are working on a local event. */
378                 loongarch_pmu_disable_event(hwc->idx);
379                 barrier();
380                 loongarch_pmu_event_update(event, hwc, hwc->idx);
381                 hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
382         }
383 }
384 
385 static int loongarch_pmu_add(struct perf_event *event, int flags)
386 {
387         int idx, err = 0;
388         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
389         struct hw_perf_event *hwc = &event->hw;
390 
391         perf_pmu_disable(event->pmu);
392 
393         /* To look for a free counter for this event. */
394         idx = loongarch_pmu_alloc_counter(cpuc, hwc);
395         if (idx < 0) {
396                 err = idx;
397                 goto out;
398         }
399 
400         /*
401          * If there is an event in the counter we are going to use then
402          * make sure it is disabled.
403          */
404         event->hw.idx = idx;
405         loongarch_pmu_disable_event(idx);
406         cpuc->events[idx] = event;
407 
408         hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
409         if (flags & PERF_EF_START)
410                 loongarch_pmu_start(event, PERF_EF_RELOAD);
411 
412         /* Propagate our changes to the userspace mapping. */
413         perf_event_update_userpage(event);
414 
415 out:
416         perf_pmu_enable(event->pmu);
417         return err;
418 }
419 
420 static void loongarch_pmu_del(struct perf_event *event, int flags)
421 {
422         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
423         struct hw_perf_event *hwc = &event->hw;
424         int idx = hwc->idx;
425 
426         WARN_ON(idx < 0 || idx >= loongarch_pmu.num_counters);
427 
428         loongarch_pmu_stop(event, PERF_EF_UPDATE);
429         cpuc->events[idx] = NULL;
430         clear_bit(idx, cpuc->used_mask);
431 
432         perf_event_update_userpage(event);
433 }
434 
435 static void loongarch_pmu_read(struct perf_event *event)
436 {
437         struct hw_perf_event *hwc = &event->hw;
438 
439         /* Don't read disabled counters! */
440         if (hwc->idx < 0)
441                 return;
442 
443         loongarch_pmu_event_update(event, hwc, hwc->idx);
444 }
445 
446 static void loongarch_pmu_enable(struct pmu *pmu)
447 {
448         resume_local_counters();
449 }
450 
451 static void loongarch_pmu_disable(struct pmu *pmu)
452 {
453         pause_local_counters();
454 }
455 
456 static DEFINE_MUTEX(pmu_reserve_mutex);
457 static atomic_t active_events = ATOMIC_INIT(0);
458 
459 static void reset_counters(void *arg);
460 static int __hw_perf_event_init(struct perf_event *event);
461 
462 static void hw_perf_event_destroy(struct perf_event *event)
463 {
464         if (atomic_dec_and_mutex_lock(&active_events, &pmu_reserve_mutex)) {
465                 on_each_cpu(reset_counters, NULL, 1);
466                 free_irq(get_percpu_irq(INT_PCOV), &loongarch_pmu);
467                 mutex_unlock(&pmu_reserve_mutex);
468         }
469 }
470 
471 static void handle_associated_event(struct cpu_hw_events *cpuc, int idx,
472                         struct perf_sample_data *data, struct pt_regs *regs)
473 {
474         struct perf_event *event = cpuc->events[idx];
475         struct hw_perf_event *hwc = &event->hw;
476 
477         loongarch_pmu_event_update(event, hwc, idx);
478         data->period = event->hw.last_period;
479         if (!loongarch_pmu_event_set_period(event, hwc, idx))
480                 return;
481 
482         if (perf_event_overflow(event, data, regs))
483                 loongarch_pmu_disable_event(idx);
484 }
485 
486 static irqreturn_t pmu_handle_irq(int irq, void *dev)
487 {
488         int n;
489         int handled = IRQ_NONE;
490         uint64_t counter;
491         struct pt_regs *regs;
492         struct perf_sample_data data;
493         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
494 
495         /*
496          * First we pause the local counters, so that when we are locked
497          * here, the counters are all paused. When it gets locked due to
498          * perf_disable(), the timer interrupt handler will be delayed.
499          *
500          * See also loongarch_pmu_start().
501          */
502         pause_local_counters();
503 
504         regs = get_irq_regs();
505 
506         perf_sample_data_init(&data, 0, 0);
507 
508         for (n = 0; n < loongarch_pmu.num_counters; n++) {
509                 if (test_bit(n, cpuc->used_mask)) {
510                         counter = loongarch_pmu.read_counter(n);
511                         if (counter & loongarch_pmu.overflow) {
512                                 handle_associated_event(cpuc, n, &data, regs);
513                                 handled = IRQ_HANDLED;
514                         }
515                 }
516         }
517 
518         resume_local_counters();
519 
520         /*
521          * Do all the work for the pending perf events. We can do this
522          * in here because the performance counter interrupt is a regular
523          * interrupt, not NMI.
524          */
525         if (handled == IRQ_HANDLED)
526                 irq_work_run();
527 
528         return handled;
529 }
530 
531 static int loongarch_pmu_event_init(struct perf_event *event)
532 {
533         int r, irq;
534         unsigned long flags;
535 
536         /* does not support taken branch sampling */
537         if (has_branch_stack(event))
538                 return -EOPNOTSUPP;
539 
540         switch (event->attr.type) {
541         case PERF_TYPE_RAW:
542         case PERF_TYPE_HARDWARE:
543         case PERF_TYPE_HW_CACHE:
544                 break;
545 
546         default:
547                 /* Init it to avoid false validate_group */
548                 event->hw.event_base = 0xffffffff;
549                 return -ENOENT;
550         }
551 
552         if (event->cpu >= 0 && !cpu_online(event->cpu))
553                 return -ENODEV;
554 
555         irq = get_percpu_irq(INT_PCOV);
556         flags = IRQF_PERCPU | IRQF_NOBALANCING | IRQF_NO_THREAD | IRQF_NO_SUSPEND | IRQF_SHARED;
557         if (!atomic_inc_not_zero(&active_events)) {
558                 mutex_lock(&pmu_reserve_mutex);
559                 if (atomic_read(&active_events) == 0) {
560                         r = request_irq(irq, pmu_handle_irq, flags, "Perf_PMU", &loongarch_pmu);
561                         if (r < 0) {
562                                 mutex_unlock(&pmu_reserve_mutex);
563                                 pr_warn("PMU IRQ request failed\n");
564                                 return -ENODEV;
565                         }
566                 }
567                 atomic_inc(&active_events);
568                 mutex_unlock(&pmu_reserve_mutex);
569         }
570 
571         return __hw_perf_event_init(event);
572 }
573 
574 static struct pmu pmu = {
575         .pmu_enable     = loongarch_pmu_enable,
576         .pmu_disable    = loongarch_pmu_disable,
577         .event_init     = loongarch_pmu_event_init,
578         .add            = loongarch_pmu_add,
579         .del            = loongarch_pmu_del,
580         .start          = loongarch_pmu_start,
581         .stop           = loongarch_pmu_stop,
582         .read           = loongarch_pmu_read,
583 };
584 
585 static unsigned int loongarch_pmu_perf_event_encode(const struct loongarch_perf_event *pev)
586 {
587         return M_PERFCTL_EVENT(pev->event_id);
588 }
589 
590 static const struct loongarch_perf_event *loongarch_pmu_map_general_event(int idx)
591 {
592         const struct loongarch_perf_event *pev;
593 
594         pev = &(*loongarch_pmu.general_event_map)[idx];
595 
596         if (pev->event_id == HW_OP_UNSUPPORTED)
597                 return ERR_PTR(-ENOENT);
598 
599         return pev;
600 }
601 
602 static const struct loongarch_perf_event *loongarch_pmu_map_cache_event(u64 config)
603 {
604         unsigned int cache_type, cache_op, cache_result;
605         const struct loongarch_perf_event *pev;
606 
607         cache_type = (config >> 0) & 0xff;
608         if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
609                 return ERR_PTR(-EINVAL);
610 
611         cache_op = (config >> 8) & 0xff;
612         if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
613                 return ERR_PTR(-EINVAL);
614 
615         cache_result = (config >> 16) & 0xff;
616         if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
617                 return ERR_PTR(-EINVAL);
618 
619         pev = &((*loongarch_pmu.cache_event_map)
620                                         [cache_type]
621                                         [cache_op]
622                                         [cache_result]);
623 
624         if (pev->event_id == CACHE_OP_UNSUPPORTED)
625                 return ERR_PTR(-ENOENT);
626 
627         return pev;
628 }
629 
630 static int validate_group(struct perf_event *event)
631 {
632         struct cpu_hw_events fake_cpuc;
633         struct perf_event *sibling, *leader = event->group_leader;
634 
635         memset(&fake_cpuc, 0, sizeof(fake_cpuc));
636 
637         if (loongarch_pmu_alloc_counter(&fake_cpuc, &leader->hw) < 0)
638                 return -EINVAL;
639 
640         for_each_sibling_event(sibling, leader) {
641                 if (loongarch_pmu_alloc_counter(&fake_cpuc, &sibling->hw) < 0)
642                         return -EINVAL;
643         }
644 
645         if (loongarch_pmu_alloc_counter(&fake_cpuc, &event->hw) < 0)
646                 return -EINVAL;
647 
648         return 0;
649 }
650 
651 static void reset_counters(void *arg)
652 {
653         int n;
654         int counters = loongarch_pmu.num_counters;
655 
656         for (n = 0; n < counters; n++) {
657                 loongarch_pmu_write_control(n, 0);
658                 loongarch_pmu.write_counter(n, 0);
659         }
660 }
661 
662 static const struct loongarch_perf_event loongson_event_map[PERF_COUNT_HW_MAX] = {
663         PERF_MAP_ALL_UNSUPPORTED,
664         [PERF_COUNT_HW_CPU_CYCLES] = { 0x00 },
665         [PERF_COUNT_HW_INSTRUCTIONS] = { 0x01 },
666         [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x08 },
667         [PERF_COUNT_HW_CACHE_MISSES] = { 0x09 },
668         [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x02 },
669         [PERF_COUNT_HW_BRANCH_MISSES] = { 0x03 },
670 };
671 
672 static const struct loongarch_perf_event loongson_cache_map
673                                 [PERF_COUNT_HW_CACHE_MAX]
674                                 [PERF_COUNT_HW_CACHE_OP_MAX]
675                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
676 PERF_CACHE_MAP_ALL_UNSUPPORTED,
677 [C(L1D)] = {
678         /*
679          * Like some other architectures (e.g. ARM), the performance
680          * counters don't differentiate between read and write
681          * accesses/misses, so this isn't strictly correct, but it's the
682          * best we can do. Writes and reads get combined.
683          */
684         [C(OP_READ)] = {
685                 [C(RESULT_ACCESS)]      = { 0x8 },
686                 [C(RESULT_MISS)]        = { 0x9 },
687         },
688         [C(OP_WRITE)] = {
689                 [C(RESULT_ACCESS)]      = { 0x8 },
690                 [C(RESULT_MISS)]        = { 0x9 },
691         },
692         [C(OP_PREFETCH)] = {
693                 [C(RESULT_ACCESS)]      = { 0xaa },
694                 [C(RESULT_MISS)]        = { 0xa9 },
695         },
696 },
697 [C(L1I)] = {
698         [C(OP_READ)] = {
699                 [C(RESULT_ACCESS)]      = { 0x6 },
700                 [C(RESULT_MISS)]        = { 0x7 },
701         },
702 },
703 [C(LL)] = {
704         [C(OP_READ)] = {
705                 [C(RESULT_ACCESS)]      = { 0xc },
706                 [C(RESULT_MISS)]        = { 0xd },
707         },
708         [C(OP_WRITE)] = {
709                 [C(RESULT_ACCESS)]      = { 0xc },
710                 [C(RESULT_MISS)]        = { 0xd },
711         },
712 },
713 [C(ITLB)] = {
714         [C(OP_READ)] = {
715                 [C(RESULT_MISS)]    = { 0x3b },
716         },
717 },
718 [C(DTLB)] = {
719         [C(OP_READ)] = {
720                 [C(RESULT_ACCESS)]      = { 0x4 },
721                 [C(RESULT_MISS)]        = { 0x3c },
722         },
723         [C(OP_WRITE)] = {
724                 [C(RESULT_ACCESS)]      = { 0x4 },
725                 [C(RESULT_MISS)]        = { 0x3c },
726         },
727 },
728 [C(BPU)] = {
729         /* Using the same code for *HW_BRANCH* */
730         [C(OP_READ)] = {
731                 [C(RESULT_ACCESS)]  = { 0x02 },
732                 [C(RESULT_MISS)]    = { 0x03 },
733         },
734 },
735 };
736 
737 static int __hw_perf_event_init(struct perf_event *event)
738 {
739         int err;
740         struct hw_perf_event *hwc = &event->hw;
741         struct perf_event_attr *attr = &event->attr;
742         const struct loongarch_perf_event *pev;
743 
744         /* Returning LoongArch event descriptor for generic perf event. */
745         if (PERF_TYPE_HARDWARE == event->attr.type) {
746                 if (event->attr.config >= PERF_COUNT_HW_MAX)
747                         return -EINVAL;
748                 pev = loongarch_pmu_map_general_event(event->attr.config);
749         } else if (PERF_TYPE_HW_CACHE == event->attr.type) {
750                 pev = loongarch_pmu_map_cache_event(event->attr.config);
751         } else if (PERF_TYPE_RAW == event->attr.type) {
752                 /* We are working on the global raw event. */
753                 mutex_lock(&raw_event_mutex);
754                 pev = loongarch_pmu.map_raw_event(event->attr.config);
755         } else {
756                 /* The event type is not (yet) supported. */
757                 return -EOPNOTSUPP;
758         }
759 
760         if (IS_ERR(pev)) {
761                 if (PERF_TYPE_RAW == event->attr.type)
762                         mutex_unlock(&raw_event_mutex);
763                 return PTR_ERR(pev);
764         }
765 
766         /*
767          * We allow max flexibility on how each individual counter shared
768          * by the single CPU operates (the mode exclusion and the range).
769          */
770         hwc->config_base = CSR_PERFCTRL_IE;
771 
772         hwc->event_base = loongarch_pmu_perf_event_encode(pev);
773         if (PERF_TYPE_RAW == event->attr.type)
774                 mutex_unlock(&raw_event_mutex);
775 
776         if (!attr->exclude_user) {
777                 hwc->config_base |= CSR_PERFCTRL_PLV3;
778                 hwc->config_base |= CSR_PERFCTRL_PLV2;
779         }
780         if (!attr->exclude_kernel) {
781                 hwc->config_base |= CSR_PERFCTRL_PLV0;
782         }
783         if (!attr->exclude_hv) {
784                 hwc->config_base |= CSR_PERFCTRL_PLV1;
785         }
786 
787         hwc->config_base &= M_PERFCTL_CONFIG_MASK;
788         /*
789          * The event can belong to another cpu. We do not assign a local
790          * counter for it for now.
791          */
792         hwc->idx = -1;
793         hwc->config = 0;
794 
795         if (!hwc->sample_period) {
796                 hwc->sample_period  = loongarch_pmu.max_period;
797                 hwc->last_period    = hwc->sample_period;
798                 local64_set(&hwc->period_left, hwc->sample_period);
799         }
800 
801         err = 0;
802         if (event->group_leader != event)
803                 err = validate_group(event);
804 
805         event->destroy = hw_perf_event_destroy;
806 
807         if (err)
808                 event->destroy(event);
809 
810         return err;
811 }
812 
813 static void pause_local_counters(void)
814 {
815         unsigned long flags;
816         int ctr = loongarch_pmu.num_counters;
817         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
818 
819         local_irq_save(flags);
820         do {
821                 ctr--;
822                 cpuc->saved_ctrl[ctr] = loongarch_pmu_read_control(ctr);
823                 loongarch_pmu_write_control(ctr, cpuc->saved_ctrl[ctr] &
824                                          ~M_PERFCTL_COUNT_EVENT_WHENEVER);
825         } while (ctr > 0);
826         local_irq_restore(flags);
827 }
828 
829 static void resume_local_counters(void)
830 {
831         int ctr = loongarch_pmu.num_counters;
832         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
833 
834         do {
835                 ctr--;
836                 loongarch_pmu_write_control(ctr, cpuc->saved_ctrl[ctr]);
837         } while (ctr > 0);
838 }
839 
840 static const struct loongarch_perf_event *loongarch_pmu_map_raw_event(u64 config)
841 {
842         raw_event.event_id = M_PERFCTL_EVENT(config);
843 
844         return &raw_event;
845 }
846 
847 static int __init init_hw_perf_events(void)
848 {
849         int counters;
850 
851         if (!cpu_has_pmp)
852                 return -ENODEV;
853 
854         pr_info("Performance counters: ");
855         counters = ((read_cpucfg(LOONGARCH_CPUCFG6) & CPUCFG6_PMNUM) >> 4) + 1;
856 
857         loongarch_pmu.num_counters = counters;
858         loongarch_pmu.max_period = (1ULL << 63) - 1;
859         loongarch_pmu.valid_count = (1ULL << 63) - 1;
860         loongarch_pmu.overflow = 1ULL << 63;
861         loongarch_pmu.name = "loongarch/loongson64";
862         loongarch_pmu.read_counter = loongarch_pmu_read_counter;
863         loongarch_pmu.write_counter = loongarch_pmu_write_counter;
864         loongarch_pmu.map_raw_event = loongarch_pmu_map_raw_event;
865         loongarch_pmu.general_event_map = &loongson_event_map;
866         loongarch_pmu.cache_event_map = &loongson_cache_map;
867 
868         on_each_cpu(reset_counters, NULL, 1);
869 
870         pr_cont("%s PMU enabled, %d %d-bit counters available to each CPU.\n",
871                         loongarch_pmu.name, counters, 64);
872 
873         perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
874 
875         return 0;
876 }
877 pure_initcall(init_hw_perf_events);
878 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php