~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/x86/events/amd/uncore.c

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0-only
  2 /*
  3  * Copyright (C) 2013 Advanced Micro Devices, Inc.
  4  *
  5  * Author: Jacob Shin <jacob.shin@amd.com>
  6  */
  7 
  8 #include <linux/perf_event.h>
  9 #include <linux/percpu.h>
 10 #include <linux/types.h>
 11 #include <linux/slab.h>
 12 #include <linux/init.h>
 13 #include <linux/cpu.h>
 14 #include <linux/cpumask.h>
 15 #include <linux/cpufeature.h>
 16 #include <linux/smp.h>
 17 
 18 #include <asm/perf_event.h>
 19 #include <asm/msr.h>
 20 
 21 #define NUM_COUNTERS_NB         4
 22 #define NUM_COUNTERS_L2         4
 23 #define NUM_COUNTERS_L3         6
 24 
 25 #define RDPMC_BASE_NB           6
 26 #define RDPMC_BASE_LLC          10
 27 
 28 #define COUNTER_SHIFT           16
 29 #define UNCORE_NAME_LEN         16
 30 #define UNCORE_GROUP_MAX        256
 31 
 32 #undef pr_fmt
 33 #define pr_fmt(fmt)     "amd_uncore: " fmt
 34 
 35 static int pmu_version;
 36 
 37 struct amd_uncore_ctx {
 38         int refcnt;
 39         int cpu;
 40         struct perf_event **events;
 41         struct hlist_node node;
 42 };
 43 
 44 struct amd_uncore_pmu {
 45         char name[UNCORE_NAME_LEN];
 46         int num_counters;
 47         int rdpmc_base;
 48         u32 msr_base;
 49         int group;
 50         cpumask_t active_mask;
 51         struct pmu pmu;
 52         struct amd_uncore_ctx * __percpu *ctx;
 53 };
 54 
 55 enum {
 56         UNCORE_TYPE_DF,
 57         UNCORE_TYPE_L3,
 58         UNCORE_TYPE_UMC,
 59 
 60         UNCORE_TYPE_MAX
 61 };
 62 
 63 union amd_uncore_info {
 64         struct {
 65                 u64     aux_data:32;    /* auxiliary data */
 66                 u64     num_pmcs:8;     /* number of counters */
 67                 u64     gid:8;          /* group id */
 68                 u64     cid:8;          /* context id */
 69         } split;
 70         u64             full;
 71 };
 72 
 73 struct amd_uncore {
 74         union amd_uncore_info  __percpu *info;
 75         struct amd_uncore_pmu *pmus;
 76         unsigned int num_pmus;
 77         bool init_done;
 78         void (*scan)(struct amd_uncore *uncore, unsigned int cpu);
 79         int  (*init)(struct amd_uncore *uncore, unsigned int cpu);
 80         void (*move)(struct amd_uncore *uncore, unsigned int cpu);
 81         void (*free)(struct amd_uncore *uncore, unsigned int cpu);
 82 };
 83 
 84 static struct amd_uncore uncores[UNCORE_TYPE_MAX];
 85 
 86 static struct amd_uncore_pmu *event_to_amd_uncore_pmu(struct perf_event *event)
 87 {
 88         return container_of(event->pmu, struct amd_uncore_pmu, pmu);
 89 }
 90 
 91 static void amd_uncore_read(struct perf_event *event)
 92 {
 93         struct hw_perf_event *hwc = &event->hw;
 94         u64 prev, new;
 95         s64 delta;
 96 
 97         /*
 98          * since we do not enable counter overflow interrupts,
 99          * we do not have to worry about prev_count changing on us
100          */
101 
102         prev = local64_read(&hwc->prev_count);
103 
104         /*
105          * Some uncore PMUs do not have RDPMC assignments. In such cases,
106          * read counts directly from the corresponding PERF_CTR.
107          */
108         if (hwc->event_base_rdpmc < 0)
109                 rdmsrl(hwc->event_base, new);
110         else
111                 rdpmcl(hwc->event_base_rdpmc, new);
112 
113         local64_set(&hwc->prev_count, new);
114         delta = (new << COUNTER_SHIFT) - (prev << COUNTER_SHIFT);
115         delta >>= COUNTER_SHIFT;
116         local64_add(delta, &event->count);
117 }
118 
119 static void amd_uncore_start(struct perf_event *event, int flags)
120 {
121         struct hw_perf_event *hwc = &event->hw;
122 
123         if (flags & PERF_EF_RELOAD)
124                 wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
125 
126         hwc->state = 0;
127         wrmsrl(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE));
128         perf_event_update_userpage(event);
129 }
130 
131 static void amd_uncore_stop(struct perf_event *event, int flags)
132 {
133         struct hw_perf_event *hwc = &event->hw;
134 
135         wrmsrl(hwc->config_base, hwc->config);
136         hwc->state |= PERF_HES_STOPPED;
137 
138         if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
139                 event->pmu->read(event);
140                 hwc->state |= PERF_HES_UPTODATE;
141         }
142 }
143 
144 static int amd_uncore_add(struct perf_event *event, int flags)
145 {
146         int i;
147         struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
148         struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
149         struct hw_perf_event *hwc = &event->hw;
150 
151         /* are we already assigned? */
152         if (hwc->idx != -1 && ctx->events[hwc->idx] == event)
153                 goto out;
154 
155         for (i = 0; i < pmu->num_counters; i++) {
156                 if (ctx->events[i] == event) {
157                         hwc->idx = i;
158                         goto out;
159                 }
160         }
161 
162         /* if not, take the first available counter */
163         hwc->idx = -1;
164         for (i = 0; i < pmu->num_counters; i++) {
165                 struct perf_event *tmp = NULL;
166 
167                 if (try_cmpxchg(&ctx->events[i], &tmp, event)) {
168                         hwc->idx = i;
169                         break;
170                 }
171         }
172 
173 out:
174         if (hwc->idx == -1)
175                 return -EBUSY;
176 
177         hwc->config_base = pmu->msr_base + (2 * hwc->idx);
178         hwc->event_base = pmu->msr_base + 1 + (2 * hwc->idx);
179         hwc->event_base_rdpmc = pmu->rdpmc_base + hwc->idx;
180         hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
181 
182         if (pmu->rdpmc_base < 0)
183                 hwc->event_base_rdpmc = -1;
184 
185         if (flags & PERF_EF_START)
186                 event->pmu->start(event, PERF_EF_RELOAD);
187 
188         return 0;
189 }
190 
191 static void amd_uncore_del(struct perf_event *event, int flags)
192 {
193         int i;
194         struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
195         struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
196         struct hw_perf_event *hwc = &event->hw;
197 
198         event->pmu->stop(event, PERF_EF_UPDATE);
199 
200         for (i = 0; i < pmu->num_counters; i++) {
201                 struct perf_event *tmp = event;
202 
203                 if (try_cmpxchg(&ctx->events[i], &tmp, NULL))
204                         break;
205         }
206 
207         hwc->idx = -1;
208 }
209 
210 static int amd_uncore_event_init(struct perf_event *event)
211 {
212         struct amd_uncore_pmu *pmu;
213         struct amd_uncore_ctx *ctx;
214         struct hw_perf_event *hwc = &event->hw;
215 
216         if (event->attr.type != event->pmu->type)
217                 return -ENOENT;
218 
219         if (event->cpu < 0)
220                 return -EINVAL;
221 
222         pmu = event_to_amd_uncore_pmu(event);
223         ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
224         if (!ctx)
225                 return -ENODEV;
226 
227         /*
228          * NB and Last level cache counters (MSRs) are shared across all cores
229          * that share the same NB / Last level cache.  On family 16h and below,
230          * Interrupts can be directed to a single target core, however, event
231          * counts generated by processes running on other cores cannot be masked
232          * out. So we do not support sampling and per-thread events via
233          * CAP_NO_INTERRUPT, and we do not enable counter overflow interrupts:
234          */
235         hwc->config = event->attr.config;
236         hwc->idx = -1;
237 
238         /*
239          * since request can come in to any of the shared cores, we will remap
240          * to a single common cpu.
241          */
242         event->cpu = ctx->cpu;
243 
244         return 0;
245 }
246 
247 static umode_t
248 amd_f17h_uncore_is_visible(struct kobject *kobj, struct attribute *attr, int i)
249 {
250         return boot_cpu_data.x86 >= 0x17 && boot_cpu_data.x86 < 0x19 ?
251                attr->mode : 0;
252 }
253 
254 static umode_t
255 amd_f19h_uncore_is_visible(struct kobject *kobj, struct attribute *attr, int i)
256 {
257         return boot_cpu_data.x86 >= 0x19 ? attr->mode : 0;
258 }
259 
260 static ssize_t amd_uncore_attr_show_cpumask(struct device *dev,
261                                             struct device_attribute *attr,
262                                             char *buf)
263 {
264         struct pmu *ptr = dev_get_drvdata(dev);
265         struct amd_uncore_pmu *pmu = container_of(ptr, struct amd_uncore_pmu, pmu);
266 
267         return cpumap_print_to_pagebuf(true, buf, &pmu->active_mask);
268 }
269 static DEVICE_ATTR(cpumask, S_IRUGO, amd_uncore_attr_show_cpumask, NULL);
270 
271 static struct attribute *amd_uncore_attrs[] = {
272         &dev_attr_cpumask.attr,
273         NULL,
274 };
275 
276 static struct attribute_group amd_uncore_attr_group = {
277         .attrs = amd_uncore_attrs,
278 };
279 
280 #define DEFINE_UNCORE_FORMAT_ATTR(_var, _name, _format)                 \
281 static ssize_t __uncore_##_var##_show(struct device *dev,               \
282                                 struct device_attribute *attr,          \
283                                 char *page)                             \
284 {                                                                       \
285         BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE);                     \
286         return sprintf(page, _format "\n");                             \
287 }                                                                       \
288 static struct device_attribute format_attr_##_var =                     \
289         __ATTR(_name, 0444, __uncore_##_var##_show, NULL)
290 
291 DEFINE_UNCORE_FORMAT_ATTR(event12,      event,          "config:0-7,32-35");
292 DEFINE_UNCORE_FORMAT_ATTR(event14,      event,          "config:0-7,32-35,59-60"); /* F17h+ DF */
293 DEFINE_UNCORE_FORMAT_ATTR(event14v2,    event,          "config:0-7,32-37");       /* PerfMonV2 DF */
294 DEFINE_UNCORE_FORMAT_ATTR(event8,       event,          "config:0-7");             /* F17h+ L3, PerfMonV2 UMC */
295 DEFINE_UNCORE_FORMAT_ATTR(umask8,       umask,          "config:8-15");
296 DEFINE_UNCORE_FORMAT_ATTR(umask12,      umask,          "config:8-15,24-27");      /* PerfMonV2 DF */
297 DEFINE_UNCORE_FORMAT_ATTR(coreid,       coreid,         "config:42-44");           /* F19h L3 */
298 DEFINE_UNCORE_FORMAT_ATTR(slicemask,    slicemask,      "config:48-51");           /* F17h L3 */
299 DEFINE_UNCORE_FORMAT_ATTR(threadmask8,  threadmask,     "config:56-63");           /* F17h L3 */
300 DEFINE_UNCORE_FORMAT_ATTR(threadmask2,  threadmask,     "config:56-57");           /* F19h L3 */
301 DEFINE_UNCORE_FORMAT_ATTR(enallslices,  enallslices,    "config:46");              /* F19h L3 */
302 DEFINE_UNCORE_FORMAT_ATTR(enallcores,   enallcores,     "config:47");              /* F19h L3 */
303 DEFINE_UNCORE_FORMAT_ATTR(sliceid,      sliceid,        "config:48-50");           /* F19h L3 */
304 DEFINE_UNCORE_FORMAT_ATTR(rdwrmask,     rdwrmask,       "config:8-9");             /* PerfMonV2 UMC */
305 
306 /* Common DF and NB attributes */
307 static struct attribute *amd_uncore_df_format_attr[] = {
308         &format_attr_event12.attr,      /* event */
309         &format_attr_umask8.attr,       /* umask */
310         NULL,
311 };
312 
313 /* Common L2 and L3 attributes */
314 static struct attribute *amd_uncore_l3_format_attr[] = {
315         &format_attr_event12.attr,      /* event */
316         &format_attr_umask8.attr,       /* umask */
317         NULL,                           /* threadmask */
318         NULL,
319 };
320 
321 /* Common UMC attributes */
322 static struct attribute *amd_uncore_umc_format_attr[] = {
323         &format_attr_event8.attr,       /* event */
324         &format_attr_rdwrmask.attr,     /* rdwrmask */
325         NULL,
326 };
327 
328 /* F17h unique L3 attributes */
329 static struct attribute *amd_f17h_uncore_l3_format_attr[] = {
330         &format_attr_slicemask.attr,    /* slicemask */
331         NULL,
332 };
333 
334 /* F19h unique L3 attributes */
335 static struct attribute *amd_f19h_uncore_l3_format_attr[] = {
336         &format_attr_coreid.attr,       /* coreid */
337         &format_attr_enallslices.attr,  /* enallslices */
338         &format_attr_enallcores.attr,   /* enallcores */
339         &format_attr_sliceid.attr,      /* sliceid */
340         NULL,
341 };
342 
343 static struct attribute_group amd_uncore_df_format_group = {
344         .name = "format",
345         .attrs = amd_uncore_df_format_attr,
346 };
347 
348 static struct attribute_group amd_uncore_l3_format_group = {
349         .name = "format",
350         .attrs = amd_uncore_l3_format_attr,
351 };
352 
353 static struct attribute_group amd_f17h_uncore_l3_format_group = {
354         .name = "format",
355         .attrs = amd_f17h_uncore_l3_format_attr,
356         .is_visible = amd_f17h_uncore_is_visible,
357 };
358 
359 static struct attribute_group amd_f19h_uncore_l3_format_group = {
360         .name = "format",
361         .attrs = amd_f19h_uncore_l3_format_attr,
362         .is_visible = amd_f19h_uncore_is_visible,
363 };
364 
365 static struct attribute_group amd_uncore_umc_format_group = {
366         .name = "format",
367         .attrs = amd_uncore_umc_format_attr,
368 };
369 
370 static const struct attribute_group *amd_uncore_df_attr_groups[] = {
371         &amd_uncore_attr_group,
372         &amd_uncore_df_format_group,
373         NULL,
374 };
375 
376 static const struct attribute_group *amd_uncore_l3_attr_groups[] = {
377         &amd_uncore_attr_group,
378         &amd_uncore_l3_format_group,
379         NULL,
380 };
381 
382 static const struct attribute_group *amd_uncore_l3_attr_update[] = {
383         &amd_f17h_uncore_l3_format_group,
384         &amd_f19h_uncore_l3_format_group,
385         NULL,
386 };
387 
388 static const struct attribute_group *amd_uncore_umc_attr_groups[] = {
389         &amd_uncore_attr_group,
390         &amd_uncore_umc_format_group,
391         NULL,
392 };
393 
394 static __always_inline
395 int amd_uncore_ctx_cid(struct amd_uncore *uncore, unsigned int cpu)
396 {
397         union amd_uncore_info *info = per_cpu_ptr(uncore->info, cpu);
398         return info->split.cid;
399 }
400 
401 static __always_inline
402 int amd_uncore_ctx_gid(struct amd_uncore *uncore, unsigned int cpu)
403 {
404         union amd_uncore_info *info = per_cpu_ptr(uncore->info, cpu);
405         return info->split.gid;
406 }
407 
408 static __always_inline
409 int amd_uncore_ctx_num_pmcs(struct amd_uncore *uncore, unsigned int cpu)
410 {
411         union amd_uncore_info *info = per_cpu_ptr(uncore->info, cpu);
412         return info->split.num_pmcs;
413 }
414 
415 static void amd_uncore_ctx_free(struct amd_uncore *uncore, unsigned int cpu)
416 {
417         struct amd_uncore_pmu *pmu;
418         struct amd_uncore_ctx *ctx;
419         int i;
420 
421         if (!uncore->init_done)
422                 return;
423 
424         for (i = 0; i < uncore->num_pmus; i++) {
425                 pmu = &uncore->pmus[i];
426                 ctx = *per_cpu_ptr(pmu->ctx, cpu);
427                 if (!ctx)
428                         continue;
429 
430                 if (cpu == ctx->cpu)
431                         cpumask_clear_cpu(cpu, &pmu->active_mask);
432 
433                 if (!--ctx->refcnt) {
434                         kfree(ctx->events);
435                         kfree(ctx);
436                 }
437 
438                 *per_cpu_ptr(pmu->ctx, cpu) = NULL;
439         }
440 }
441 
442 static int amd_uncore_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
443 {
444         struct amd_uncore_ctx *curr, *prev;
445         struct amd_uncore_pmu *pmu;
446         int node, cid, gid, i, j;
447 
448         if (!uncore->init_done || !uncore->num_pmus)
449                 return 0;
450 
451         cid = amd_uncore_ctx_cid(uncore, cpu);
452         gid = amd_uncore_ctx_gid(uncore, cpu);
453 
454         for (i = 0; i < uncore->num_pmus; i++) {
455                 pmu = &uncore->pmus[i];
456                 *per_cpu_ptr(pmu->ctx, cpu) = NULL;
457                 curr = NULL;
458 
459                 /* Check for group exclusivity */
460                 if (gid != pmu->group)
461                         continue;
462 
463                 /* Find a sibling context */
464                 for_each_online_cpu(j) {
465                         if (cpu == j)
466                                 continue;
467 
468                         prev = *per_cpu_ptr(pmu->ctx, j);
469                         if (!prev)
470                                 continue;
471 
472                         if (cid == amd_uncore_ctx_cid(uncore, j)) {
473                                 curr = prev;
474                                 break;
475                         }
476                 }
477 
478                 /* Allocate context if sibling does not exist */
479                 if (!curr) {
480                         node = cpu_to_node(cpu);
481                         curr = kzalloc_node(sizeof(*curr), GFP_KERNEL, node);
482                         if (!curr)
483                                 goto fail;
484 
485                         curr->cpu = cpu;
486                         curr->events = kzalloc_node(sizeof(*curr->events) *
487                                                     pmu->num_counters,
488                                                     GFP_KERNEL, node);
489                         if (!curr->events) {
490                                 kfree(curr);
491                                 goto fail;
492                         }
493 
494                         cpumask_set_cpu(cpu, &pmu->active_mask);
495                 }
496 
497                 curr->refcnt++;
498                 *per_cpu_ptr(pmu->ctx, cpu) = curr;
499         }
500 
501         return 0;
502 
503 fail:
504         amd_uncore_ctx_free(uncore, cpu);
505 
506         return -ENOMEM;
507 }
508 
509 static void amd_uncore_ctx_move(struct amd_uncore *uncore, unsigned int cpu)
510 {
511         struct amd_uncore_ctx *curr, *next;
512         struct amd_uncore_pmu *pmu;
513         int i, j;
514 
515         if (!uncore->init_done)
516                 return;
517 
518         for (i = 0; i < uncore->num_pmus; i++) {
519                 pmu = &uncore->pmus[i];
520                 curr = *per_cpu_ptr(pmu->ctx, cpu);
521                 if (!curr)
522                         continue;
523 
524                 /* Migrate to a shared sibling if possible */
525                 for_each_online_cpu(j) {
526                         next = *per_cpu_ptr(pmu->ctx, j);
527                         if (!next || cpu == j)
528                                 continue;
529 
530                         if (curr == next) {
531                                 perf_pmu_migrate_context(&pmu->pmu, cpu, j);
532                                 cpumask_clear_cpu(cpu, &pmu->active_mask);
533                                 cpumask_set_cpu(j, &pmu->active_mask);
534                                 next->cpu = j;
535                                 break;
536                         }
537                 }
538         }
539 }
540 
541 static int amd_uncore_cpu_starting(unsigned int cpu)
542 {
543         struct amd_uncore *uncore;
544         int i;
545 
546         for (i = 0; i < UNCORE_TYPE_MAX; i++) {
547                 uncore = &uncores[i];
548                 uncore->scan(uncore, cpu);
549         }
550 
551         return 0;
552 }
553 
554 static int amd_uncore_cpu_online(unsigned int cpu)
555 {
556         struct amd_uncore *uncore;
557         int i;
558 
559         for (i = 0; i < UNCORE_TYPE_MAX; i++) {
560                 uncore = &uncores[i];
561                 if (uncore->init(uncore, cpu))
562                         break;
563         }
564 
565         return 0;
566 }
567 
568 static int amd_uncore_cpu_down_prepare(unsigned int cpu)
569 {
570         struct amd_uncore *uncore;
571         int i;
572 
573         for (i = 0; i < UNCORE_TYPE_MAX; i++) {
574                 uncore = &uncores[i];
575                 uncore->move(uncore, cpu);
576         }
577 
578         return 0;
579 }
580 
581 static int amd_uncore_cpu_dead(unsigned int cpu)
582 {
583         struct amd_uncore *uncore;
584         int i;
585 
586         for (i = 0; i < UNCORE_TYPE_MAX; i++) {
587                 uncore = &uncores[i];
588                 uncore->free(uncore, cpu);
589         }
590 
591         return 0;
592 }
593 
594 static int amd_uncore_df_event_init(struct perf_event *event)
595 {
596         struct hw_perf_event *hwc = &event->hw;
597         int ret = amd_uncore_event_init(event);
598 
599         if (ret || pmu_version < 2)
600                 return ret;
601 
602         hwc->config = event->attr.config &
603                       (pmu_version >= 2 ? AMD64_PERFMON_V2_RAW_EVENT_MASK_NB :
604                                           AMD64_RAW_EVENT_MASK_NB);
605 
606         return 0;
607 }
608 
609 static int amd_uncore_df_add(struct perf_event *event, int flags)
610 {
611         int ret = amd_uncore_add(event, flags & ~PERF_EF_START);
612         struct hw_perf_event *hwc = &event->hw;
613 
614         if (ret)
615                 return ret;
616 
617         /*
618          * The first four DF counters are accessible via RDPMC index 6 to 9
619          * followed by the L3 counters from index 10 to 15. For processors
620          * with more than four DF counters, the DF RDPMC assignments become
621          * discontiguous as the additional counters are accessible starting
622          * from index 16.
623          */
624         if (hwc->idx >= NUM_COUNTERS_NB)
625                 hwc->event_base_rdpmc += NUM_COUNTERS_L3;
626 
627         /* Delayed start after rdpmc base update */
628         if (flags & PERF_EF_START)
629                 amd_uncore_start(event, PERF_EF_RELOAD);
630 
631         return 0;
632 }
633 
634 static
635 void amd_uncore_df_ctx_scan(struct amd_uncore *uncore, unsigned int cpu)
636 {
637         union cpuid_0x80000022_ebx ebx;
638         union amd_uncore_info info;
639 
640         if (!boot_cpu_has(X86_FEATURE_PERFCTR_NB))
641                 return;
642 
643         info.split.aux_data = 0;
644         info.split.num_pmcs = NUM_COUNTERS_NB;
645         info.split.gid = 0;
646         info.split.cid = topology_logical_package_id(cpu);
647 
648         if (pmu_version >= 2) {
649                 ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES);
650                 info.split.num_pmcs = ebx.split.num_df_pmc;
651         }
652 
653         *per_cpu_ptr(uncore->info, cpu) = info;
654 }
655 
656 static
657 int amd_uncore_df_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
658 {
659         struct attribute **df_attr = amd_uncore_df_format_attr;
660         struct amd_uncore_pmu *pmu;
661         int num_counters;
662 
663         /* Run just once */
664         if (uncore->init_done)
665                 return amd_uncore_ctx_init(uncore, cpu);
666 
667         num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu);
668         if (!num_counters)
669                 goto done;
670 
671         /* No grouping, single instance for a system */
672         uncore->pmus = kzalloc(sizeof(*uncore->pmus), GFP_KERNEL);
673         if (!uncore->pmus)
674                 goto done;
675 
676         /*
677          * For Family 17h and above, the Northbridge counters are repurposed
678          * as Data Fabric counters. The PMUs are exported based on family as
679          * either NB or DF.
680          */
681         pmu = &uncore->pmus[0];
682         strscpy(pmu->name, boot_cpu_data.x86 >= 0x17 ? "amd_df" : "amd_nb",
683                 sizeof(pmu->name));
684         pmu->num_counters = num_counters;
685         pmu->msr_base = MSR_F15H_NB_PERF_CTL;
686         pmu->rdpmc_base = RDPMC_BASE_NB;
687         pmu->group = amd_uncore_ctx_gid(uncore, cpu);
688 
689         if (pmu_version >= 2) {
690                 *df_attr++ = &format_attr_event14v2.attr;
691                 *df_attr++ = &format_attr_umask12.attr;
692         } else if (boot_cpu_data.x86 >= 0x17) {
693                 *df_attr = &format_attr_event14.attr;
694         }
695 
696         pmu->ctx = alloc_percpu(struct amd_uncore_ctx *);
697         if (!pmu->ctx)
698                 goto done;
699 
700         pmu->pmu = (struct pmu) {
701                 .task_ctx_nr    = perf_invalid_context,
702                 .attr_groups    = amd_uncore_df_attr_groups,
703                 .name           = pmu->name,
704                 .event_init     = amd_uncore_df_event_init,
705                 .add            = amd_uncore_df_add,
706                 .del            = amd_uncore_del,
707                 .start          = amd_uncore_start,
708                 .stop           = amd_uncore_stop,
709                 .read           = amd_uncore_read,
710                 .capabilities   = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
711                 .module         = THIS_MODULE,
712         };
713 
714         if (perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1)) {
715                 free_percpu(pmu->ctx);
716                 pmu->ctx = NULL;
717                 goto done;
718         }
719 
720         pr_info("%d %s%s counters detected\n", pmu->num_counters,
721                 boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ?  "HYGON " : "",
722                 pmu->pmu.name);
723 
724         uncore->num_pmus = 1;
725 
726 done:
727         uncore->init_done = true;
728 
729         return amd_uncore_ctx_init(uncore, cpu);
730 }
731 
732 static int amd_uncore_l3_event_init(struct perf_event *event)
733 {
734         int ret = amd_uncore_event_init(event);
735         struct hw_perf_event *hwc = &event->hw;
736         u64 config = event->attr.config;
737         u64 mask;
738 
739         hwc->config = config & AMD64_RAW_EVENT_MASK_NB;
740 
741         /*
742          * SliceMask and ThreadMask need to be set for certain L3 events.
743          * For other events, the two fields do not affect the count.
744          */
745         if (ret || boot_cpu_data.x86 < 0x17)
746                 return ret;
747 
748         mask = config & (AMD64_L3_F19H_THREAD_MASK | AMD64_L3_SLICEID_MASK |
749                          AMD64_L3_EN_ALL_CORES | AMD64_L3_EN_ALL_SLICES |
750                          AMD64_L3_COREID_MASK);
751 
752         if (boot_cpu_data.x86 <= 0x18)
753                 mask = ((config & AMD64_L3_SLICE_MASK) ? : AMD64_L3_SLICE_MASK) |
754                        ((config & AMD64_L3_THREAD_MASK) ? : AMD64_L3_THREAD_MASK);
755 
756         /*
757          * If the user doesn't specify a ThreadMask, they're not trying to
758          * count core 0, so we enable all cores & threads.
759          * We'll also assume that they want to count slice 0 if they specify
760          * a ThreadMask and leave SliceId and EnAllSlices unpopulated.
761          */
762         else if (!(config & AMD64_L3_F19H_THREAD_MASK))
763                 mask = AMD64_L3_F19H_THREAD_MASK | AMD64_L3_EN_ALL_SLICES |
764                        AMD64_L3_EN_ALL_CORES;
765 
766         hwc->config |= mask;
767 
768         return 0;
769 }
770 
771 static
772 void amd_uncore_l3_ctx_scan(struct amd_uncore *uncore, unsigned int cpu)
773 {
774         union amd_uncore_info info;
775 
776         if (!boot_cpu_has(X86_FEATURE_PERFCTR_LLC))
777                 return;
778 
779         info.split.aux_data = 0;
780         info.split.num_pmcs = NUM_COUNTERS_L2;
781         info.split.gid = 0;
782         info.split.cid = per_cpu_llc_id(cpu);
783 
784         if (boot_cpu_data.x86 >= 0x17)
785                 info.split.num_pmcs = NUM_COUNTERS_L3;
786 
787         *per_cpu_ptr(uncore->info, cpu) = info;
788 }
789 
790 static
791 int amd_uncore_l3_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
792 {
793         struct attribute **l3_attr = amd_uncore_l3_format_attr;
794         struct amd_uncore_pmu *pmu;
795         int num_counters;
796 
797         /* Run just once */
798         if (uncore->init_done)
799                 return amd_uncore_ctx_init(uncore, cpu);
800 
801         num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu);
802         if (!num_counters)
803                 goto done;
804 
805         /* No grouping, single instance for a system */
806         uncore->pmus = kzalloc(sizeof(*uncore->pmus), GFP_KERNEL);
807         if (!uncore->pmus)
808                 goto done;
809 
810         /*
811          * For Family 17h and above, L3 cache counters are available instead
812          * of L2 cache counters. The PMUs are exported based on family as
813          * either L2 or L3.
814          */
815         pmu = &uncore->pmus[0];
816         strscpy(pmu->name, boot_cpu_data.x86 >= 0x17 ? "amd_l3" : "amd_l2",
817                 sizeof(pmu->name));
818         pmu->num_counters = num_counters;
819         pmu->msr_base = MSR_F16H_L2I_PERF_CTL;
820         pmu->rdpmc_base = RDPMC_BASE_LLC;
821         pmu->group = amd_uncore_ctx_gid(uncore, cpu);
822 
823         if (boot_cpu_data.x86 >= 0x17) {
824                 *l3_attr++ = &format_attr_event8.attr;
825                 *l3_attr++ = &format_attr_umask8.attr;
826                 *l3_attr++ = boot_cpu_data.x86 >= 0x19 ?
827                              &format_attr_threadmask2.attr :
828                              &format_attr_threadmask8.attr;
829         }
830 
831         pmu->ctx = alloc_percpu(struct amd_uncore_ctx *);
832         if (!pmu->ctx)
833                 goto done;
834 
835         pmu->pmu = (struct pmu) {
836                 .task_ctx_nr    = perf_invalid_context,
837                 .attr_groups    = amd_uncore_l3_attr_groups,
838                 .attr_update    = amd_uncore_l3_attr_update,
839                 .name           = pmu->name,
840                 .event_init     = amd_uncore_l3_event_init,
841                 .add            = amd_uncore_add,
842                 .del            = amd_uncore_del,
843                 .start          = amd_uncore_start,
844                 .stop           = amd_uncore_stop,
845                 .read           = amd_uncore_read,
846                 .capabilities   = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
847                 .module         = THIS_MODULE,
848         };
849 
850         if (perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1)) {
851                 free_percpu(pmu->ctx);
852                 pmu->ctx = NULL;
853                 goto done;
854         }
855 
856         pr_info("%d %s%s counters detected\n", pmu->num_counters,
857                 boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ?  "HYGON " : "",
858                 pmu->pmu.name);
859 
860         uncore->num_pmus = 1;
861 
862 done:
863         uncore->init_done = true;
864 
865         return amd_uncore_ctx_init(uncore, cpu);
866 }
867 
868 static int amd_uncore_umc_event_init(struct perf_event *event)
869 {
870         struct hw_perf_event *hwc = &event->hw;
871         int ret = amd_uncore_event_init(event);
872 
873         if (ret)
874                 return ret;
875 
876         hwc->config = event->attr.config & AMD64_PERFMON_V2_RAW_EVENT_MASK_UMC;
877 
878         return 0;
879 }
880 
881 static void amd_uncore_umc_start(struct perf_event *event, int flags)
882 {
883         struct hw_perf_event *hwc = &event->hw;
884 
885         if (flags & PERF_EF_RELOAD)
886                 wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
887 
888         hwc->state = 0;
889         wrmsrl(hwc->config_base, (hwc->config | AMD64_PERFMON_V2_ENABLE_UMC));
890         perf_event_update_userpage(event);
891 }
892 
893 static
894 void amd_uncore_umc_ctx_scan(struct amd_uncore *uncore, unsigned int cpu)
895 {
896         union cpuid_0x80000022_ebx ebx;
897         union amd_uncore_info info;
898         unsigned int eax, ecx, edx;
899 
900         if (pmu_version < 2)
901                 return;
902 
903         cpuid(EXT_PERFMON_DEBUG_FEATURES, &eax, &ebx.full, &ecx, &edx);
904         info.split.aux_data = ecx;      /* stash active mask */
905         info.split.num_pmcs = ebx.split.num_umc_pmc;
906         info.split.gid = topology_logical_package_id(cpu);
907         info.split.cid = topology_logical_package_id(cpu);
908         *per_cpu_ptr(uncore->info, cpu) = info;
909 }
910 
911 static
912 int amd_uncore_umc_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
913 {
914         DECLARE_BITMAP(gmask, UNCORE_GROUP_MAX) = { 0 };
915         u8 group_num_pmus[UNCORE_GROUP_MAX] = { 0 };
916         u8 group_num_pmcs[UNCORE_GROUP_MAX] = { 0 };
917         union amd_uncore_info info;
918         struct amd_uncore_pmu *pmu;
919         int index = 0, gid, i;
920 
921         if (pmu_version < 2)
922                 return 0;
923 
924         /* Run just once */
925         if (uncore->init_done)
926                 return amd_uncore_ctx_init(uncore, cpu);
927 
928         /* Find unique groups */
929         for_each_online_cpu(i) {
930                 info = *per_cpu_ptr(uncore->info, i);
931                 gid = info.split.gid;
932                 if (test_bit(gid, gmask))
933                         continue;
934 
935                 __set_bit(gid, gmask);
936                 group_num_pmus[gid] = hweight32(info.split.aux_data);
937                 group_num_pmcs[gid] = info.split.num_pmcs;
938                 uncore->num_pmus += group_num_pmus[gid];
939         }
940 
941         uncore->pmus = kzalloc(sizeof(*uncore->pmus) * uncore->num_pmus,
942                                GFP_KERNEL);
943         if (!uncore->pmus) {
944                 uncore->num_pmus = 0;
945                 goto done;
946         }
947 
948         for_each_set_bit(gid, gmask, UNCORE_GROUP_MAX) {
949                 for (i = 0; i < group_num_pmus[gid]; i++) {
950                         pmu = &uncore->pmus[index];
951                         snprintf(pmu->name, sizeof(pmu->name), "amd_umc_%d", index);
952                         pmu->num_counters = group_num_pmcs[gid] / group_num_pmus[gid];
953                         pmu->msr_base = MSR_F19H_UMC_PERF_CTL + i * pmu->num_counters * 2;
954                         pmu->rdpmc_base = -1;
955                         pmu->group = gid;
956 
957                         pmu->ctx = alloc_percpu(struct amd_uncore_ctx *);
958                         if (!pmu->ctx)
959                                 goto done;
960 
961                         pmu->pmu = (struct pmu) {
962                                 .task_ctx_nr    = perf_invalid_context,
963                                 .attr_groups    = amd_uncore_umc_attr_groups,
964                                 .name           = pmu->name,
965                                 .event_init     = amd_uncore_umc_event_init,
966                                 .add            = amd_uncore_add,
967                                 .del            = amd_uncore_del,
968                                 .start          = amd_uncore_umc_start,
969                                 .stop           = amd_uncore_stop,
970                                 .read           = amd_uncore_read,
971                                 .capabilities   = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
972                                 .module         = THIS_MODULE,
973                         };
974 
975                         if (perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1)) {
976                                 free_percpu(pmu->ctx);
977                                 pmu->ctx = NULL;
978                                 goto done;
979                         }
980 
981                         pr_info("%d %s counters detected\n", pmu->num_counters,
982                                 pmu->pmu.name);
983 
984                         index++;
985                 }
986         }
987 
988 done:
989         uncore->num_pmus = index;
990         uncore->init_done = true;
991 
992         return amd_uncore_ctx_init(uncore, cpu);
993 }
994 
995 static struct amd_uncore uncores[UNCORE_TYPE_MAX] = {
996         /* UNCORE_TYPE_DF */
997         {
998                 .scan = amd_uncore_df_ctx_scan,
999                 .init = amd_uncore_df_ctx_init,
1000                 .move = amd_uncore_ctx_move,
1001                 .free = amd_uncore_ctx_free,
1002         },
1003         /* UNCORE_TYPE_L3 */
1004         {
1005                 .scan = amd_uncore_l3_ctx_scan,
1006                 .init = amd_uncore_l3_ctx_init,
1007                 .move = amd_uncore_ctx_move,
1008                 .free = amd_uncore_ctx_free,
1009         },
1010         /* UNCORE_TYPE_UMC */
1011         {
1012                 .scan = amd_uncore_umc_ctx_scan,
1013                 .init = amd_uncore_umc_ctx_init,
1014                 .move = amd_uncore_ctx_move,
1015                 .free = amd_uncore_ctx_free,
1016         },
1017 };
1018 
1019 static int __init amd_uncore_init(void)
1020 {
1021         struct amd_uncore *uncore;
1022         int ret = -ENODEV;
1023         int i;
1024 
1025         if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
1026             boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
1027                 return -ENODEV;
1028 
1029         if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
1030                 return -ENODEV;
1031 
1032         if (boot_cpu_has(X86_FEATURE_PERFMON_V2))
1033                 pmu_version = 2;
1034 
1035         for (i = 0; i < UNCORE_TYPE_MAX; i++) {
1036                 uncore = &uncores[i];
1037 
1038                 BUG_ON(!uncore->scan);
1039                 BUG_ON(!uncore->init);
1040                 BUG_ON(!uncore->move);
1041                 BUG_ON(!uncore->free);
1042 
1043                 uncore->info = alloc_percpu(union amd_uncore_info);
1044                 if (!uncore->info) {
1045                         ret = -ENOMEM;
1046                         goto fail;
1047                 }
1048         };
1049 
1050         /*
1051          * Install callbacks. Core will call them for each online cpu.
1052          */
1053         ret = cpuhp_setup_state(CPUHP_PERF_X86_AMD_UNCORE_PREP,
1054                                 "perf/x86/amd/uncore:prepare",
1055                                 NULL, amd_uncore_cpu_dead);
1056         if (ret)
1057                 goto fail;
1058 
1059         ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING,
1060                                 "perf/x86/amd/uncore:starting",
1061                                 amd_uncore_cpu_starting, NULL);
1062         if (ret)
1063                 goto fail_prep;
1064 
1065         ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE,
1066                                 "perf/x86/amd/uncore:online",
1067                                 amd_uncore_cpu_online,
1068                                 amd_uncore_cpu_down_prepare);
1069         if (ret)
1070                 goto fail_start;
1071 
1072         return 0;
1073 
1074 fail_start:
1075         cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING);
1076 fail_prep:
1077         cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP);
1078 fail:
1079         for (i = 0; i < UNCORE_TYPE_MAX; i++) {
1080                 uncore = &uncores[i];
1081                 if (uncore->info) {
1082                         free_percpu(uncore->info);
1083                         uncore->info = NULL;
1084                 }
1085         }
1086 
1087         return ret;
1088 }
1089 
1090 static void __exit amd_uncore_exit(void)
1091 {
1092         struct amd_uncore *uncore;
1093         struct amd_uncore_pmu *pmu;
1094         int i, j;
1095 
1096         cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE);
1097         cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING);
1098         cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP);
1099 
1100         for (i = 0; i < UNCORE_TYPE_MAX; i++) {
1101                 uncore = &uncores[i];
1102                 if (!uncore->info)
1103                         continue;
1104 
1105                 free_percpu(uncore->info);
1106                 uncore->info = NULL;
1107 
1108                 for (j = 0; j < uncore->num_pmus; j++) {
1109                         pmu = &uncore->pmus[j];
1110                         if (!pmu->ctx)
1111                                 continue;
1112 
1113                         perf_pmu_unregister(&pmu->pmu);
1114                         free_percpu(pmu->ctx);
1115                         pmu->ctx = NULL;
1116                 }
1117 
1118                 kfree(uncore->pmus);
1119                 uncore->pmus = NULL;
1120         }
1121 }
1122 
1123 module_init(amd_uncore_init);
1124 module_exit(amd_uncore_exit);
1125 
1126 MODULE_DESCRIPTION("AMD Uncore Driver");
1127 MODULE_LICENSE("GPL v2");
1128 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php