1 /* 2 * Performance events - AMD IBS 3 * 4 * Copyright (C) 2011 Advanced Micro Devices, Inc., Robert Richter 5 * 6 * For licencing details see kernel-base/COPYING 7 */ 8 9 #include <linux/perf_event.h> 10 #include <linux/init.h> 11 #include <linux/export.h> 12 #include <linux/pci.h> 13 #include <linux/ptrace.h> 14 #include <linux/syscore_ops.h> 15 #include <linux/sched/clock.h> 16 17 #include <asm/apic.h> 18 19 #include "../perf_event.h" 20 21 static u32 ibs_caps; 22 23 #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) 24 25 #include <linux/kprobes.h> 26 #include <linux/hardirq.h> 27 28 #include <asm/nmi.h> 29 #include <asm/amd-ibs.h> 30 31 #define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT) 32 #define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT 33 34 35 /* 36 * IBS states: 37 * 38 * ENABLED; tracks the pmu::add(), pmu::del() state, when set the counter is taken 39 * and any further add()s must fail. 40 * 41 * STARTED/STOPPING/STOPPED; deal with pmu::start(), pmu::stop() state but are 42 * complicated by the fact that the IBS hardware can send late NMIs (ie. after 43 * we've cleared the EN bit). 44 * 45 * In order to consume these late NMIs we have the STOPPED state, any NMI that 46 * happens after we've cleared the EN state will clear this bit and report the 47 * NMI handled (this is fundamentally racy in the face or multiple NMI sources, 48 * someone else can consume our BIT and our NMI will go unhandled). 49 * 50 * And since we cannot set/clear this separate bit together with the EN bit, 51 * there are races; if we cleared STARTED early, an NMI could land in 52 * between clearing STARTED and clearing the EN bit (in fact multiple NMIs 53 * could happen if the period is small enough), and consume our STOPPED bit 54 * and trigger streams of unhandled NMIs. 55 * 56 * If, however, we clear STARTED late, an NMI can hit between clearing the 57 * EN bit and clearing STARTED, still see STARTED set and process the event. 58 * If this event will have the VALID bit clear, we bail properly, but this 59 * is not a given. With VALID set we can end up calling pmu::stop() again 60 * (the throttle logic) and trigger the WARNs in there. 61 * 62 * So what we do is set STOPPING before clearing EN to avoid the pmu::stop() 63 * nesting, and clear STARTED late, so that we have a well defined state over 64 * the clearing of the EN bit. 65 * 66 * XXX: we could probably be using !atomic bitops for all this. 67 */ 68 69 enum ibs_states { 70 IBS_ENABLED = 0, 71 IBS_STARTED = 1, 72 IBS_STOPPING = 2, 73 IBS_STOPPED = 3, 74 75 IBS_MAX_STATES, 76 }; 77 78 struct cpu_perf_ibs { 79 struct perf_event *event; 80 unsigned long state[BITS_TO_LONGS(IBS_MAX_STATES)]; 81 }; 82 83 struct perf_ibs { 84 struct pmu pmu; 85 unsigned int msr; 86 u64 config_mask; 87 u64 cnt_mask; 88 u64 enable_mask; 89 u64 valid_mask; 90 u64 max_period; 91 unsigned long offset_mask[1]; 92 int offset_max; 93 unsigned int fetch_count_reset_broken : 1; 94 unsigned int fetch_ignore_if_zero_rip : 1; 95 struct cpu_perf_ibs __percpu *pcpu; 96 97 u64 (*get_count)(u64 config); 98 }; 99 100 static int 101 perf_event_set_period(struct hw_perf_event *hwc, u64 min, u64 max, u64 *hw_period) 102 { 103 s64 left = local64_read(&hwc->period_left); 104 s64 period = hwc->sample_period; 105 int overflow = 0; 106 107 /* 108 * If we are way outside a reasonable range then just skip forward: 109 */ 110 if (unlikely(left <= -period)) { 111 left = period; 112 local64_set(&hwc->period_left, left); 113 hwc->last_period = period; 114 overflow = 1; 115 } 116 117 if (unlikely(left < (s64)min)) { 118 left += period; 119 local64_set(&hwc->period_left, left); 120 hwc->last_period = period; 121 overflow = 1; 122 } 123 124 /* 125 * If the hw period that triggers the sw overflow is too short 126 * we might hit the irq handler. This biases the results. 127 * Thus we shorten the next-to-last period and set the last 128 * period to the max period. 129 */ 130 if (left > max) { 131 left -= max; 132 if (left > max) 133 left = max; 134 else if (left < min) 135 left = min; 136 } 137 138 *hw_period = (u64)left; 139 140 return overflow; 141 } 142 143 static int 144 perf_event_try_update(struct perf_event *event, u64 new_raw_count, int width) 145 { 146 struct hw_perf_event *hwc = &event->hw; 147 int shift = 64 - width; 148 u64 prev_raw_count; 149 u64 delta; 150 151 /* 152 * Careful: an NMI might modify the previous event value. 153 * 154 * Our tactic to handle this is to first atomically read and 155 * exchange a new raw count - then add that new-prev delta 156 * count to the generic event atomically: 157 */ 158 prev_raw_count = local64_read(&hwc->prev_count); 159 if (!local64_try_cmpxchg(&hwc->prev_count, 160 &prev_raw_count, new_raw_count)) 161 return 0; 162 163 /* 164 * Now we have the new raw value and have updated the prev 165 * timestamp already. We can now calculate the elapsed delta 166 * (event-)time and add that to the generic event. 167 * 168 * Careful, not all hw sign-extends above the physical width 169 * of the count. 170 */ 171 delta = (new_raw_count << shift) - (prev_raw_count << shift); 172 delta >>= shift; 173 174 local64_add(delta, &event->count); 175 local64_sub(delta, &hwc->period_left); 176 177 return 1; 178 } 179 180 static struct perf_ibs perf_ibs_fetch; 181 static struct perf_ibs perf_ibs_op; 182 183 static struct perf_ibs *get_ibs_pmu(int type) 184 { 185 if (perf_ibs_fetch.pmu.type == type) 186 return &perf_ibs_fetch; 187 if (perf_ibs_op.pmu.type == type) 188 return &perf_ibs_op; 189 return NULL; 190 } 191 192 /* 193 * core pmu config -> IBS config 194 * 195 * perf record -a -e cpu-cycles:p ... # use ibs op counting cycle count 196 * perf record -a -e r076:p ... # same as -e cpu-cycles:p 197 * perf record -a -e r0C1:p ... # use ibs op counting micro-ops 198 * 199 * IbsOpCntCtl (bit 19) of IBS Execution Control Register (IbsOpCtl, 200 * MSRC001_1033) is used to select either cycle or micro-ops counting 201 * mode. 202 */ 203 static int core_pmu_ibs_config(struct perf_event *event, u64 *config) 204 { 205 switch (event->attr.type) { 206 case PERF_TYPE_HARDWARE: 207 switch (event->attr.config) { 208 case PERF_COUNT_HW_CPU_CYCLES: 209 *config = 0; 210 return 0; 211 } 212 break; 213 case PERF_TYPE_RAW: 214 switch (event->attr.config) { 215 case 0x0076: 216 *config = 0; 217 return 0; 218 case 0x00C1: 219 *config = IBS_OP_CNT_CTL; 220 return 0; 221 } 222 break; 223 default: 224 return -ENOENT; 225 } 226 227 return -EOPNOTSUPP; 228 } 229 230 /* 231 * The rip of IBS samples has skid 0. Thus, IBS supports precise 232 * levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the 233 * rip is invalid when IBS was not able to record the rip correctly. 234 * We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then. 235 */ 236 int forward_event_to_ibs(struct perf_event *event) 237 { 238 u64 config = 0; 239 240 if (!event->attr.precise_ip || event->attr.precise_ip > 2) 241 return -EOPNOTSUPP; 242 243 if (!core_pmu_ibs_config(event, &config)) { 244 event->attr.type = perf_ibs_op.pmu.type; 245 event->attr.config = config; 246 } 247 return -ENOENT; 248 } 249 250 /* 251 * Grouping of IBS events is not possible since IBS can have only 252 * one event active at any point in time. 253 */ 254 static int validate_group(struct perf_event *event) 255 { 256 struct perf_event *sibling; 257 258 if (event->group_leader == event) 259 return 0; 260 261 if (event->group_leader->pmu == event->pmu) 262 return -EINVAL; 263 264 for_each_sibling_event(sibling, event->group_leader) { 265 if (sibling->pmu == event->pmu) 266 return -EINVAL; 267 } 268 return 0; 269 } 270 271 static int perf_ibs_init(struct perf_event *event) 272 { 273 struct hw_perf_event *hwc = &event->hw; 274 struct perf_ibs *perf_ibs; 275 u64 max_cnt, config; 276 int ret; 277 278 perf_ibs = get_ibs_pmu(event->attr.type); 279 if (!perf_ibs) 280 return -ENOENT; 281 282 config = event->attr.config; 283 284 if (event->pmu != &perf_ibs->pmu) 285 return -ENOENT; 286 287 if (config & ~perf_ibs->config_mask) 288 return -EINVAL; 289 290 if (has_branch_stack(event)) 291 return -EOPNOTSUPP; 292 293 ret = validate_group(event); 294 if (ret) 295 return ret; 296 297 if (hwc->sample_period) { 298 if (config & perf_ibs->cnt_mask) 299 /* raw max_cnt may not be set */ 300 return -EINVAL; 301 if (!event->attr.sample_freq && hwc->sample_period & 0x0f) 302 /* 303 * lower 4 bits can not be set in ibs max cnt, 304 * but allowing it in case we adjust the 305 * sample period to set a frequency. 306 */ 307 return -EINVAL; 308 hwc->sample_period &= ~0x0FULL; 309 if (!hwc->sample_period) 310 hwc->sample_period = 0x10; 311 } else { 312 max_cnt = config & perf_ibs->cnt_mask; 313 config &= ~perf_ibs->cnt_mask; 314 event->attr.sample_period = max_cnt << 4; 315 hwc->sample_period = event->attr.sample_period; 316 } 317 318 if (!hwc->sample_period) 319 return -EINVAL; 320 321 /* 322 * If we modify hwc->sample_period, we also need to update 323 * hwc->last_period and hwc->period_left. 324 */ 325 hwc->last_period = hwc->sample_period; 326 local64_set(&hwc->period_left, hwc->sample_period); 327 328 hwc->config_base = perf_ibs->msr; 329 hwc->config = config; 330 331 return 0; 332 } 333 334 static int perf_ibs_set_period(struct perf_ibs *perf_ibs, 335 struct hw_perf_event *hwc, u64 *period) 336 { 337 int overflow; 338 339 /* ignore lower 4 bits in min count: */ 340 overflow = perf_event_set_period(hwc, 1<<4, perf_ibs->max_period, period); 341 local64_set(&hwc->prev_count, 0); 342 343 return overflow; 344 } 345 346 static u64 get_ibs_fetch_count(u64 config) 347 { 348 union ibs_fetch_ctl fetch_ctl = (union ibs_fetch_ctl)config; 349 350 return fetch_ctl.fetch_cnt << 4; 351 } 352 353 static u64 get_ibs_op_count(u64 config) 354 { 355 union ibs_op_ctl op_ctl = (union ibs_op_ctl)config; 356 u64 count = 0; 357 358 /* 359 * If the internal 27-bit counter rolled over, the count is MaxCnt 360 * and the lower 7 bits of CurCnt are randomized. 361 * Otherwise CurCnt has the full 27-bit current counter value. 362 */ 363 if (op_ctl.op_val) { 364 count = op_ctl.opmaxcnt << 4; 365 if (ibs_caps & IBS_CAPS_OPCNTEXT) 366 count += op_ctl.opmaxcnt_ext << 20; 367 } else if (ibs_caps & IBS_CAPS_RDWROPCNT) { 368 count = op_ctl.opcurcnt; 369 } 370 371 return count; 372 } 373 374 static void 375 perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event, 376 u64 *config) 377 { 378 u64 count = perf_ibs->get_count(*config); 379 380 /* 381 * Set width to 64 since we do not overflow on max width but 382 * instead on max count. In perf_ibs_set_period() we clear 383 * prev count manually on overflow. 384 */ 385 while (!perf_event_try_update(event, count, 64)) { 386 rdmsrl(event->hw.config_base, *config); 387 count = perf_ibs->get_count(*config); 388 } 389 } 390 391 static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs, 392 struct hw_perf_event *hwc, u64 config) 393 { 394 u64 tmp = hwc->config | config; 395 396 if (perf_ibs->fetch_count_reset_broken) 397 wrmsrl(hwc->config_base, tmp & ~perf_ibs->enable_mask); 398 399 wrmsrl(hwc->config_base, tmp | perf_ibs->enable_mask); 400 } 401 402 /* 403 * Erratum #420 Instruction-Based Sampling Engine May Generate 404 * Interrupt that Cannot Be Cleared: 405 * 406 * Must clear counter mask first, then clear the enable bit. See 407 * Revision Guide for AMD Family 10h Processors, Publication #41322. 408 */ 409 static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs, 410 struct hw_perf_event *hwc, u64 config) 411 { 412 config &= ~perf_ibs->cnt_mask; 413 if (boot_cpu_data.x86 == 0x10) 414 wrmsrl(hwc->config_base, config); 415 config &= ~perf_ibs->enable_mask; 416 wrmsrl(hwc->config_base, config); 417 } 418 419 /* 420 * We cannot restore the ibs pmu state, so we always needs to update 421 * the event while stopping it and then reset the state when starting 422 * again. Thus, ignoring PERF_EF_RELOAD and PERF_EF_UPDATE flags in 423 * perf_ibs_start()/perf_ibs_stop() and instead always do it. 424 */ 425 static void perf_ibs_start(struct perf_event *event, int flags) 426 { 427 struct hw_perf_event *hwc = &event->hw; 428 struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); 429 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); 430 u64 period, config = 0; 431 432 if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) 433 return; 434 435 WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); 436 hwc->state = 0; 437 438 perf_ibs_set_period(perf_ibs, hwc, &period); 439 if (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_OPCNTEXT)) { 440 config |= period & IBS_OP_MAX_CNT_EXT_MASK; 441 period &= ~IBS_OP_MAX_CNT_EXT_MASK; 442 } 443 config |= period >> 4; 444 445 /* 446 * Set STARTED before enabling the hardware, such that a subsequent NMI 447 * must observe it. 448 */ 449 set_bit(IBS_STARTED, pcpu->state); 450 clear_bit(IBS_STOPPING, pcpu->state); 451 perf_ibs_enable_event(perf_ibs, hwc, config); 452 453 perf_event_update_userpage(event); 454 } 455 456 static void perf_ibs_stop(struct perf_event *event, int flags) 457 { 458 struct hw_perf_event *hwc = &event->hw; 459 struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); 460 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); 461 u64 config; 462 int stopping; 463 464 if (test_and_set_bit(IBS_STOPPING, pcpu->state)) 465 return; 466 467 stopping = test_bit(IBS_STARTED, pcpu->state); 468 469 if (!stopping && (hwc->state & PERF_HES_UPTODATE)) 470 return; 471 472 rdmsrl(hwc->config_base, config); 473 474 if (stopping) { 475 /* 476 * Set STOPPED before disabling the hardware, such that it 477 * must be visible to NMIs the moment we clear the EN bit, 478 * at which point we can generate an !VALID sample which 479 * we need to consume. 480 */ 481 set_bit(IBS_STOPPED, pcpu->state); 482 perf_ibs_disable_event(perf_ibs, hwc, config); 483 /* 484 * Clear STARTED after disabling the hardware; if it were 485 * cleared before an NMI hitting after the clear but before 486 * clearing the EN bit might think it a spurious NMI and not 487 * handle it. 488 * 489 * Clearing it after, however, creates the problem of the NMI 490 * handler seeing STARTED but not having a valid sample. 491 */ 492 clear_bit(IBS_STARTED, pcpu->state); 493 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); 494 hwc->state |= PERF_HES_STOPPED; 495 } 496 497 if (hwc->state & PERF_HES_UPTODATE) 498 return; 499 500 /* 501 * Clear valid bit to not count rollovers on update, rollovers 502 * are only updated in the irq handler. 503 */ 504 config &= ~perf_ibs->valid_mask; 505 506 perf_ibs_event_update(perf_ibs, event, &config); 507 hwc->state |= PERF_HES_UPTODATE; 508 } 509 510 static int perf_ibs_add(struct perf_event *event, int flags) 511 { 512 struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); 513 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); 514 515 if (test_and_set_bit(IBS_ENABLED, pcpu->state)) 516 return -ENOSPC; 517 518 event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; 519 520 pcpu->event = event; 521 522 if (flags & PERF_EF_START) 523 perf_ibs_start(event, PERF_EF_RELOAD); 524 525 return 0; 526 } 527 528 static void perf_ibs_del(struct perf_event *event, int flags) 529 { 530 struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); 531 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); 532 533 if (!test_and_clear_bit(IBS_ENABLED, pcpu->state)) 534 return; 535 536 perf_ibs_stop(event, PERF_EF_UPDATE); 537 538 pcpu->event = NULL; 539 540 perf_event_update_userpage(event); 541 } 542 543 static void perf_ibs_read(struct perf_event *event) { } 544 545 /* 546 * We need to initialize with empty group if all attributes in the 547 * group are dynamic. 548 */ 549 static struct attribute *attrs_empty[] = { 550 NULL, 551 }; 552 553 static struct attribute_group empty_format_group = { 554 .name = "format", 555 .attrs = attrs_empty, 556 }; 557 558 static struct attribute_group empty_caps_group = { 559 .name = "caps", 560 .attrs = attrs_empty, 561 }; 562 563 static const struct attribute_group *empty_attr_groups[] = { 564 &empty_format_group, 565 &empty_caps_group, 566 NULL, 567 }; 568 569 PMU_FORMAT_ATTR(rand_en, "config:57"); 570 PMU_FORMAT_ATTR(cnt_ctl, "config:19"); 571 PMU_EVENT_ATTR_STRING(l3missonly, fetch_l3missonly, "config:59"); 572 PMU_EVENT_ATTR_STRING(l3missonly, op_l3missonly, "config:16"); 573 PMU_EVENT_ATTR_STRING(zen4_ibs_extensions, zen4_ibs_extensions, "1"); 574 575 static umode_t 576 zen4_ibs_extensions_is_visible(struct kobject *kobj, struct attribute *attr, int i) 577 { 578 return ibs_caps & IBS_CAPS_ZEN4 ? attr->mode : 0; 579 } 580 581 static struct attribute *rand_en_attrs[] = { 582 &format_attr_rand_en.attr, 583 NULL, 584 }; 585 586 static struct attribute *fetch_l3missonly_attrs[] = { 587 &fetch_l3missonly.attr.attr, 588 NULL, 589 }; 590 591 static struct attribute *zen4_ibs_extensions_attrs[] = { 592 &zen4_ibs_extensions.attr.attr, 593 NULL, 594 }; 595 596 static struct attribute_group group_rand_en = { 597 .name = "format", 598 .attrs = rand_en_attrs, 599 }; 600 601 static struct attribute_group group_fetch_l3missonly = { 602 .name = "format", 603 .attrs = fetch_l3missonly_attrs, 604 .is_visible = zen4_ibs_extensions_is_visible, 605 }; 606 607 static struct attribute_group group_zen4_ibs_extensions = { 608 .name = "caps", 609 .attrs = zen4_ibs_extensions_attrs, 610 .is_visible = zen4_ibs_extensions_is_visible, 611 }; 612 613 static const struct attribute_group *fetch_attr_groups[] = { 614 &group_rand_en, 615 &empty_caps_group, 616 NULL, 617 }; 618 619 static const struct attribute_group *fetch_attr_update[] = { 620 &group_fetch_l3missonly, 621 &group_zen4_ibs_extensions, 622 NULL, 623 }; 624 625 static umode_t 626 cnt_ctl_is_visible(struct kobject *kobj, struct attribute *attr, int i) 627 { 628 return ibs_caps & IBS_CAPS_OPCNT ? attr->mode : 0; 629 } 630 631 static struct attribute *cnt_ctl_attrs[] = { 632 &format_attr_cnt_ctl.attr, 633 NULL, 634 }; 635 636 static struct attribute *op_l3missonly_attrs[] = { 637 &op_l3missonly.attr.attr, 638 NULL, 639 }; 640 641 static struct attribute_group group_cnt_ctl = { 642 .name = "format", 643 .attrs = cnt_ctl_attrs, 644 .is_visible = cnt_ctl_is_visible, 645 }; 646 647 static struct attribute_group group_op_l3missonly = { 648 .name = "format", 649 .attrs = op_l3missonly_attrs, 650 .is_visible = zen4_ibs_extensions_is_visible, 651 }; 652 653 static const struct attribute_group *op_attr_update[] = { 654 &group_cnt_ctl, 655 &group_op_l3missonly, 656 &group_zen4_ibs_extensions, 657 NULL, 658 }; 659 660 static struct perf_ibs perf_ibs_fetch = { 661 .pmu = { 662 .task_ctx_nr = perf_hw_context, 663 664 .event_init = perf_ibs_init, 665 .add = perf_ibs_add, 666 .del = perf_ibs_del, 667 .start = perf_ibs_start, 668 .stop = perf_ibs_stop, 669 .read = perf_ibs_read, 670 .capabilities = PERF_PMU_CAP_NO_EXCLUDE, 671 }, 672 .msr = MSR_AMD64_IBSFETCHCTL, 673 .config_mask = IBS_FETCH_CONFIG_MASK, 674 .cnt_mask = IBS_FETCH_MAX_CNT, 675 .enable_mask = IBS_FETCH_ENABLE, 676 .valid_mask = IBS_FETCH_VAL, 677 .max_period = IBS_FETCH_MAX_CNT << 4, 678 .offset_mask = { MSR_AMD64_IBSFETCH_REG_MASK }, 679 .offset_max = MSR_AMD64_IBSFETCH_REG_COUNT, 680 681 .get_count = get_ibs_fetch_count, 682 }; 683 684 static struct perf_ibs perf_ibs_op = { 685 .pmu = { 686 .task_ctx_nr = perf_hw_context, 687 688 .event_init = perf_ibs_init, 689 .add = perf_ibs_add, 690 .del = perf_ibs_del, 691 .start = perf_ibs_start, 692 .stop = perf_ibs_stop, 693 .read = perf_ibs_read, 694 .capabilities = PERF_PMU_CAP_NO_EXCLUDE, 695 }, 696 .msr = MSR_AMD64_IBSOPCTL, 697 .config_mask = IBS_OP_CONFIG_MASK, 698 .cnt_mask = IBS_OP_MAX_CNT | IBS_OP_CUR_CNT | 699 IBS_OP_CUR_CNT_RAND, 700 .enable_mask = IBS_OP_ENABLE, 701 .valid_mask = IBS_OP_VAL, 702 .max_period = IBS_OP_MAX_CNT << 4, 703 .offset_mask = { MSR_AMD64_IBSOP_REG_MASK }, 704 .offset_max = MSR_AMD64_IBSOP_REG_COUNT, 705 706 .get_count = get_ibs_op_count, 707 }; 708 709 static void perf_ibs_get_mem_op(union ibs_op_data3 *op_data3, 710 struct perf_sample_data *data) 711 { 712 union perf_mem_data_src *data_src = &data->data_src; 713 714 data_src->mem_op = PERF_MEM_OP_NA; 715 716 if (op_data3->ld_op) 717 data_src->mem_op = PERF_MEM_OP_LOAD; 718 else if (op_data3->st_op) 719 data_src->mem_op = PERF_MEM_OP_STORE; 720 } 721 722 /* 723 * Processors having CPUID_Fn8000001B_EAX[11] aka IBS_CAPS_ZEN4 has 724 * more fine granular DataSrc encodings. Others have coarse. 725 */ 726 static u8 perf_ibs_data_src(union ibs_op_data2 *op_data2) 727 { 728 if (ibs_caps & IBS_CAPS_ZEN4) 729 return (op_data2->data_src_hi << 3) | op_data2->data_src_lo; 730 731 return op_data2->data_src_lo; 732 } 733 734 #define L(x) (PERF_MEM_S(LVL, x) | PERF_MEM_S(LVL, HIT)) 735 #define LN(x) PERF_MEM_S(LVLNUM, x) 736 #define REM PERF_MEM_S(REMOTE, REMOTE) 737 #define HOPS(x) PERF_MEM_S(HOPS, x) 738 739 static u64 g_data_src[8] = { 740 [IBS_DATA_SRC_LOC_CACHE] = L(L3) | L(REM_CCE1) | LN(ANY_CACHE) | HOPS(0), 741 [IBS_DATA_SRC_DRAM] = L(LOC_RAM) | LN(RAM), 742 [IBS_DATA_SRC_REM_CACHE] = L(REM_CCE2) | LN(ANY_CACHE) | REM | HOPS(1), 743 [IBS_DATA_SRC_IO] = L(IO) | LN(IO), 744 }; 745 746 #define RMT_NODE_BITS (1 << IBS_DATA_SRC_DRAM) 747 #define RMT_NODE_APPLICABLE(x) (RMT_NODE_BITS & (1 << x)) 748 749 static u64 g_zen4_data_src[32] = { 750 [IBS_DATA_SRC_EXT_LOC_CACHE] = L(L3) | LN(L3), 751 [IBS_DATA_SRC_EXT_NEAR_CCX_CACHE] = L(REM_CCE1) | LN(ANY_CACHE) | REM | HOPS(0), 752 [IBS_DATA_SRC_EXT_DRAM] = L(LOC_RAM) | LN(RAM), 753 [IBS_DATA_SRC_EXT_FAR_CCX_CACHE] = L(REM_CCE2) | LN(ANY_CACHE) | REM | HOPS(1), 754 [IBS_DATA_SRC_EXT_PMEM] = LN(PMEM), 755 [IBS_DATA_SRC_EXT_IO] = L(IO) | LN(IO), 756 [IBS_DATA_SRC_EXT_EXT_MEM] = LN(CXL), 757 }; 758 759 #define ZEN4_RMT_NODE_BITS ((1 << IBS_DATA_SRC_EXT_DRAM) | \ 760 (1 << IBS_DATA_SRC_EXT_PMEM) | \ 761 (1 << IBS_DATA_SRC_EXT_EXT_MEM)) 762 #define ZEN4_RMT_NODE_APPLICABLE(x) (ZEN4_RMT_NODE_BITS & (1 << x)) 763 764 static __u64 perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2, 765 union ibs_op_data3 *op_data3, 766 struct perf_sample_data *data) 767 { 768 union perf_mem_data_src *data_src = &data->data_src; 769 u8 ibs_data_src = perf_ibs_data_src(op_data2); 770 771 data_src->mem_lvl = 0; 772 data_src->mem_lvl_num = 0; 773 774 /* 775 * DcMiss, L2Miss, DataSrc, DcMissLat etc. are all invalid for Uncached 776 * memory accesses. So, check DcUcMemAcc bit early. 777 */ 778 if (op_data3->dc_uc_mem_acc && ibs_data_src != IBS_DATA_SRC_EXT_IO) 779 return L(UNC) | LN(UNC); 780 781 /* L1 Hit */ 782 if (op_data3->dc_miss == 0) 783 return L(L1) | LN(L1); 784 785 /* L2 Hit */ 786 if (op_data3->l2_miss == 0) { 787 /* Erratum #1293 */ 788 if (boot_cpu_data.x86 != 0x19 || boot_cpu_data.x86_model > 0xF || 789 !(op_data3->sw_pf || op_data3->dc_miss_no_mab_alloc)) 790 return L(L2) | LN(L2); 791 } 792 793 /* 794 * OP_DATA2 is valid only for load ops. Skip all checks which 795 * uses OP_DATA2[DataSrc]. 796 */ 797 if (data_src->mem_op != PERF_MEM_OP_LOAD) 798 goto check_mab; 799 800 if (ibs_caps & IBS_CAPS_ZEN4) { 801 u64 val = g_zen4_data_src[ibs_data_src]; 802 803 if (!val) 804 goto check_mab; 805 806 /* HOPS_1 because IBS doesn't provide remote socket detail */ 807 if (op_data2->rmt_node && ZEN4_RMT_NODE_APPLICABLE(ibs_data_src)) { 808 if (ibs_data_src == IBS_DATA_SRC_EXT_DRAM) 809 val = L(REM_RAM1) | LN(RAM) | REM | HOPS(1); 810 else 811 val |= REM | HOPS(1); 812 } 813 814 return val; 815 } else { 816 u64 val = g_data_src[ibs_data_src]; 817 818 if (!val) 819 goto check_mab; 820 821 /* HOPS_1 because IBS doesn't provide remote socket detail */ 822 if (op_data2->rmt_node && RMT_NODE_APPLICABLE(ibs_data_src)) { 823 if (ibs_data_src == IBS_DATA_SRC_DRAM) 824 val = L(REM_RAM1) | LN(RAM) | REM | HOPS(1); 825 else 826 val |= REM | HOPS(1); 827 } 828 829 return val; 830 } 831 832 check_mab: 833 /* 834 * MAB (Miss Address Buffer) Hit. MAB keeps track of outstanding 835 * DC misses. However, such data may come from any level in mem 836 * hierarchy. IBS provides detail about both MAB as well as actual 837 * DataSrc simultaneously. Prioritize DataSrc over MAB, i.e. set 838 * MAB only when IBS fails to provide DataSrc. 839 */ 840 if (op_data3->dc_miss_no_mab_alloc) 841 return L(LFB) | LN(LFB); 842 843 /* Don't set HIT with NA */ 844 return PERF_MEM_S(LVL, NA) | LN(NA); 845 } 846 847 static bool perf_ibs_cache_hit_st_valid(void) 848 { 849 /* 0: Uninitialized, 1: Valid, -1: Invalid */ 850 static int cache_hit_st_valid; 851 852 if (unlikely(!cache_hit_st_valid)) { 853 if (boot_cpu_data.x86 == 0x19 && 854 (boot_cpu_data.x86_model <= 0xF || 855 (boot_cpu_data.x86_model >= 0x20 && 856 boot_cpu_data.x86_model <= 0x5F))) { 857 cache_hit_st_valid = -1; 858 } else { 859 cache_hit_st_valid = 1; 860 } 861 } 862 863 return cache_hit_st_valid == 1; 864 } 865 866 static void perf_ibs_get_mem_snoop(union ibs_op_data2 *op_data2, 867 struct perf_sample_data *data) 868 { 869 union perf_mem_data_src *data_src = &data->data_src; 870 u8 ibs_data_src; 871 872 data_src->mem_snoop = PERF_MEM_SNOOP_NA; 873 874 if (!perf_ibs_cache_hit_st_valid() || 875 data_src->mem_op != PERF_MEM_OP_LOAD || 876 data_src->mem_lvl & PERF_MEM_LVL_L1 || 877 data_src->mem_lvl & PERF_MEM_LVL_L2 || 878 op_data2->cache_hit_st) 879 return; 880 881 ibs_data_src = perf_ibs_data_src(op_data2); 882 883 if (ibs_caps & IBS_CAPS_ZEN4) { 884 if (ibs_data_src == IBS_DATA_SRC_EXT_LOC_CACHE || 885 ibs_data_src == IBS_DATA_SRC_EXT_NEAR_CCX_CACHE || 886 ibs_data_src == IBS_DATA_SRC_EXT_FAR_CCX_CACHE) 887 data_src->mem_snoop = PERF_MEM_SNOOP_HITM; 888 } else if (ibs_data_src == IBS_DATA_SRC_LOC_CACHE) { 889 data_src->mem_snoop = PERF_MEM_SNOOP_HITM; 890 } 891 } 892 893 static void perf_ibs_get_tlb_lvl(union ibs_op_data3 *op_data3, 894 struct perf_sample_data *data) 895 { 896 union perf_mem_data_src *data_src = &data->data_src; 897 898 data_src->mem_dtlb = PERF_MEM_TLB_NA; 899 900 if (!op_data3->dc_lin_addr_valid) 901 return; 902 903 if (!op_data3->dc_l1tlb_miss) { 904 data_src->mem_dtlb = PERF_MEM_TLB_L1 | PERF_MEM_TLB_HIT; 905 return; 906 } 907 908 if (!op_data3->dc_l2tlb_miss) { 909 data_src->mem_dtlb = PERF_MEM_TLB_L2 | PERF_MEM_TLB_HIT; 910 return; 911 } 912 913 data_src->mem_dtlb = PERF_MEM_TLB_L2 | PERF_MEM_TLB_MISS; 914 } 915 916 static void perf_ibs_get_mem_lock(union ibs_op_data3 *op_data3, 917 struct perf_sample_data *data) 918 { 919 union perf_mem_data_src *data_src = &data->data_src; 920 921 data_src->mem_lock = PERF_MEM_LOCK_NA; 922 923 if (op_data3->dc_locked_op) 924 data_src->mem_lock = PERF_MEM_LOCK_LOCKED; 925 } 926 927 #define ibs_op_msr_idx(msr) (msr - MSR_AMD64_IBSOPCTL) 928 929 static void perf_ibs_get_data_src(struct perf_ibs_data *ibs_data, 930 struct perf_sample_data *data, 931 union ibs_op_data2 *op_data2, 932 union ibs_op_data3 *op_data3) 933 { 934 union perf_mem_data_src *data_src = &data->data_src; 935 936 data_src->val |= perf_ibs_get_mem_lvl(op_data2, op_data3, data); 937 perf_ibs_get_mem_snoop(op_data2, data); 938 perf_ibs_get_tlb_lvl(op_data3, data); 939 perf_ibs_get_mem_lock(op_data3, data); 940 } 941 942 static __u64 perf_ibs_get_op_data2(struct perf_ibs_data *ibs_data, 943 union ibs_op_data3 *op_data3) 944 { 945 __u64 val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA2)]; 946 947 /* Erratum #1293 */ 948 if (boot_cpu_data.x86 == 0x19 && boot_cpu_data.x86_model <= 0xF && 949 (op_data3->sw_pf || op_data3->dc_miss_no_mab_alloc)) { 950 /* 951 * OP_DATA2 has only two fields on Zen3: DataSrc and RmtNode. 952 * DataSrc=0 is 'No valid status' and RmtNode is invalid when 953 * DataSrc=0. 954 */ 955 val = 0; 956 } 957 return val; 958 } 959 960 static void perf_ibs_parse_ld_st_data(__u64 sample_type, 961 struct perf_ibs_data *ibs_data, 962 struct perf_sample_data *data) 963 { 964 union ibs_op_data3 op_data3; 965 union ibs_op_data2 op_data2; 966 union ibs_op_data op_data; 967 968 data->data_src.val = PERF_MEM_NA; 969 op_data3.val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA3)]; 970 971 perf_ibs_get_mem_op(&op_data3, data); 972 if (data->data_src.mem_op != PERF_MEM_OP_LOAD && 973 data->data_src.mem_op != PERF_MEM_OP_STORE) 974 return; 975 976 op_data2.val = perf_ibs_get_op_data2(ibs_data, &op_data3); 977 978 if (sample_type & PERF_SAMPLE_DATA_SRC) { 979 perf_ibs_get_data_src(ibs_data, data, &op_data2, &op_data3); 980 data->sample_flags |= PERF_SAMPLE_DATA_SRC; 981 } 982 983 if (sample_type & PERF_SAMPLE_WEIGHT_TYPE && op_data3.dc_miss && 984 data->data_src.mem_op == PERF_MEM_OP_LOAD) { 985 op_data.val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA)]; 986 987 if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) { 988 data->weight.var1_dw = op_data3.dc_miss_lat; 989 data->weight.var2_w = op_data.tag_to_ret_ctr; 990 } else if (sample_type & PERF_SAMPLE_WEIGHT) { 991 data->weight.full = op_data3.dc_miss_lat; 992 } 993 data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE; 994 } 995 996 if (sample_type & PERF_SAMPLE_ADDR && op_data3.dc_lin_addr_valid) { 997 data->addr = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSDCLINAD)]; 998 data->sample_flags |= PERF_SAMPLE_ADDR; 999 } 1000 1001 if (sample_type & PERF_SAMPLE_PHYS_ADDR && op_data3.dc_phy_addr_valid) { 1002 data->phys_addr = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSDCPHYSAD)]; 1003 data->sample_flags |= PERF_SAMPLE_PHYS_ADDR; 1004 } 1005 } 1006 1007 static int perf_ibs_get_offset_max(struct perf_ibs *perf_ibs, u64 sample_type, 1008 int check_rip) 1009 { 1010 if (sample_type & PERF_SAMPLE_RAW || 1011 (perf_ibs == &perf_ibs_op && 1012 (sample_type & PERF_SAMPLE_DATA_SRC || 1013 sample_type & PERF_SAMPLE_WEIGHT_TYPE || 1014 sample_type & PERF_SAMPLE_ADDR || 1015 sample_type & PERF_SAMPLE_PHYS_ADDR))) 1016 return perf_ibs->offset_max; 1017 else if (check_rip) 1018 return 3; 1019 return 1; 1020 } 1021 1022 static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs) 1023 { 1024 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); 1025 struct perf_event *event = pcpu->event; 1026 struct hw_perf_event *hwc; 1027 struct perf_sample_data data; 1028 struct perf_raw_record raw; 1029 struct pt_regs regs; 1030 struct perf_ibs_data ibs_data; 1031 int offset, size, check_rip, offset_max, throttle = 0; 1032 unsigned int msr; 1033 u64 *buf, *config, period, new_config = 0; 1034 1035 if (!test_bit(IBS_STARTED, pcpu->state)) { 1036 fail: 1037 /* 1038 * Catch spurious interrupts after stopping IBS: After 1039 * disabling IBS there could be still incoming NMIs 1040 * with samples that even have the valid bit cleared. 1041 * Mark all this NMIs as handled. 1042 */ 1043 if (test_and_clear_bit(IBS_STOPPED, pcpu->state)) 1044 return 1; 1045 1046 return 0; 1047 } 1048 1049 if (WARN_ON_ONCE(!event)) 1050 goto fail; 1051 1052 hwc = &event->hw; 1053 msr = hwc->config_base; 1054 buf = ibs_data.regs; 1055 rdmsrl(msr, *buf); 1056 if (!(*buf++ & perf_ibs->valid_mask)) 1057 goto fail; 1058 1059 config = &ibs_data.regs[0]; 1060 perf_ibs_event_update(perf_ibs, event, config); 1061 perf_sample_data_init(&data, 0, hwc->last_period); 1062 if (!perf_ibs_set_period(perf_ibs, hwc, &period)) 1063 goto out; /* no sw counter overflow */ 1064 1065 ibs_data.caps = ibs_caps; 1066 size = 1; 1067 offset = 1; 1068 check_rip = (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_RIPINVALIDCHK)); 1069 1070 offset_max = perf_ibs_get_offset_max(perf_ibs, event->attr.sample_type, check_rip); 1071 1072 do { 1073 rdmsrl(msr + offset, *buf++); 1074 size++; 1075 offset = find_next_bit(perf_ibs->offset_mask, 1076 perf_ibs->offset_max, 1077 offset + 1); 1078 } while (offset < offset_max); 1079 /* 1080 * Read IbsBrTarget, IbsOpData4, and IbsExtdCtl separately 1081 * depending on their availability. 1082 * Can't add to offset_max as they are staggered 1083 */ 1084 if (event->attr.sample_type & PERF_SAMPLE_RAW) { 1085 if (perf_ibs == &perf_ibs_op) { 1086 if (ibs_caps & IBS_CAPS_BRNTRGT) { 1087 rdmsrl(MSR_AMD64_IBSBRTARGET, *buf++); 1088 size++; 1089 } 1090 if (ibs_caps & IBS_CAPS_OPDATA4) { 1091 rdmsrl(MSR_AMD64_IBSOPDATA4, *buf++); 1092 size++; 1093 } 1094 } 1095 if (perf_ibs == &perf_ibs_fetch && (ibs_caps & IBS_CAPS_FETCHCTLEXTD)) { 1096 rdmsrl(MSR_AMD64_ICIBSEXTDCTL, *buf++); 1097 size++; 1098 } 1099 } 1100 ibs_data.size = sizeof(u64) * size; 1101 1102 regs = *iregs; 1103 if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) { 1104 regs.flags &= ~PERF_EFLAGS_EXACT; 1105 } else { 1106 /* Workaround for erratum #1197 */ 1107 if (perf_ibs->fetch_ignore_if_zero_rip && !(ibs_data.regs[1])) 1108 goto out; 1109 1110 set_linear_ip(®s, ibs_data.regs[1]); 1111 regs.flags |= PERF_EFLAGS_EXACT; 1112 } 1113 1114 if (event->attr.sample_type & PERF_SAMPLE_RAW) { 1115 raw = (struct perf_raw_record){ 1116 .frag = { 1117 .size = sizeof(u32) + ibs_data.size, 1118 .data = ibs_data.data, 1119 }, 1120 }; 1121 perf_sample_save_raw_data(&data, &raw); 1122 } 1123 1124 if (perf_ibs == &perf_ibs_op) 1125 perf_ibs_parse_ld_st_data(event->attr.sample_type, &ibs_data, &data); 1126 1127 /* 1128 * rip recorded by IbsOpRip will not be consistent with rsp and rbp 1129 * recorded as part of interrupt regs. Thus we need to use rip from 1130 * interrupt regs while unwinding call stack. 1131 */ 1132 if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) 1133 perf_sample_save_callchain(&data, event, iregs); 1134 1135 throttle = perf_event_overflow(event, &data, ®s); 1136 out: 1137 if (throttle) { 1138 perf_ibs_stop(event, 0); 1139 } else { 1140 if (perf_ibs == &perf_ibs_op) { 1141 if (ibs_caps & IBS_CAPS_OPCNTEXT) { 1142 new_config = period & IBS_OP_MAX_CNT_EXT_MASK; 1143 period &= ~IBS_OP_MAX_CNT_EXT_MASK; 1144 } 1145 if ((ibs_caps & IBS_CAPS_RDWROPCNT) && (*config & IBS_OP_CNT_CTL)) 1146 new_config |= *config & IBS_OP_CUR_CNT_RAND; 1147 } 1148 new_config |= period >> 4; 1149 1150 perf_ibs_enable_event(perf_ibs, hwc, new_config); 1151 } 1152 1153 perf_event_update_userpage(event); 1154 1155 return 1; 1156 } 1157 1158 static int 1159 perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs) 1160 { 1161 u64 stamp = sched_clock(); 1162 int handled = 0; 1163 1164 handled += perf_ibs_handle_irq(&perf_ibs_fetch, regs); 1165 handled += perf_ibs_handle_irq(&perf_ibs_op, regs); 1166 1167 if (handled) 1168 inc_irq_stat(apic_perf_irqs); 1169 1170 perf_sample_event_took(sched_clock() - stamp); 1171 1172 return handled; 1173 } 1174 NOKPROBE_SYMBOL(perf_ibs_nmi_handler); 1175 1176 static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name) 1177 { 1178 struct cpu_perf_ibs __percpu *pcpu; 1179 int ret; 1180 1181 pcpu = alloc_percpu(struct cpu_perf_ibs); 1182 if (!pcpu) 1183 return -ENOMEM; 1184 1185 perf_ibs->pcpu = pcpu; 1186 1187 ret = perf_pmu_register(&perf_ibs->pmu, name, -1); 1188 if (ret) { 1189 perf_ibs->pcpu = NULL; 1190 free_percpu(pcpu); 1191 } 1192 1193 return ret; 1194 } 1195 1196 static __init int perf_ibs_fetch_init(void) 1197 { 1198 /* 1199 * Some chips fail to reset the fetch count when it is written; instead 1200 * they need a 0-1 transition of IbsFetchEn. 1201 */ 1202 if (boot_cpu_data.x86 >= 0x16 && boot_cpu_data.x86 <= 0x18) 1203 perf_ibs_fetch.fetch_count_reset_broken = 1; 1204 1205 if (boot_cpu_data.x86 == 0x19 && boot_cpu_data.x86_model < 0x10) 1206 perf_ibs_fetch.fetch_ignore_if_zero_rip = 1; 1207 1208 if (ibs_caps & IBS_CAPS_ZEN4) 1209 perf_ibs_fetch.config_mask |= IBS_FETCH_L3MISSONLY; 1210 1211 perf_ibs_fetch.pmu.attr_groups = fetch_attr_groups; 1212 perf_ibs_fetch.pmu.attr_update = fetch_attr_update; 1213 1214 return perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch"); 1215 } 1216 1217 static __init int perf_ibs_op_init(void) 1218 { 1219 if (ibs_caps & IBS_CAPS_OPCNT) 1220 perf_ibs_op.config_mask |= IBS_OP_CNT_CTL; 1221 1222 if (ibs_caps & IBS_CAPS_OPCNTEXT) { 1223 perf_ibs_op.max_period |= IBS_OP_MAX_CNT_EXT_MASK; 1224 perf_ibs_op.config_mask |= IBS_OP_MAX_CNT_EXT_MASK; 1225 perf_ibs_op.cnt_mask |= IBS_OP_MAX_CNT_EXT_MASK; 1226 } 1227 1228 if (ibs_caps & IBS_CAPS_ZEN4) 1229 perf_ibs_op.config_mask |= IBS_OP_L3MISSONLY; 1230 1231 perf_ibs_op.pmu.attr_groups = empty_attr_groups; 1232 perf_ibs_op.pmu.attr_update = op_attr_update; 1233 1234 return perf_ibs_pmu_init(&perf_ibs_op, "ibs_op"); 1235 } 1236 1237 static __init int perf_event_ibs_init(void) 1238 { 1239 int ret; 1240 1241 ret = perf_ibs_fetch_init(); 1242 if (ret) 1243 return ret; 1244 1245 ret = perf_ibs_op_init(); 1246 if (ret) 1247 goto err_op; 1248 1249 ret = register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs"); 1250 if (ret) 1251 goto err_nmi; 1252 1253 pr_info("perf: AMD IBS detected (0x%08x)\n", ibs_caps); 1254 return 0; 1255 1256 err_nmi: 1257 perf_pmu_unregister(&perf_ibs_op.pmu); 1258 free_percpu(perf_ibs_op.pcpu); 1259 perf_ibs_op.pcpu = NULL; 1260 err_op: 1261 perf_pmu_unregister(&perf_ibs_fetch.pmu); 1262 free_percpu(perf_ibs_fetch.pcpu); 1263 perf_ibs_fetch.pcpu = NULL; 1264 1265 return ret; 1266 } 1267 1268 #else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */ 1269 1270 static __init int perf_event_ibs_init(void) 1271 { 1272 return 0; 1273 } 1274 1275 #endif 1276 1277 /* IBS - apic initialization, for perf and oprofile */ 1278 1279 static __init u32 __get_ibs_caps(void) 1280 { 1281 u32 caps; 1282 unsigned int max_level; 1283 1284 if (!boot_cpu_has(X86_FEATURE_IBS)) 1285 return 0; 1286 1287 /* check IBS cpuid feature flags */ 1288 max_level = cpuid_eax(0x80000000); 1289 if (max_level < IBS_CPUID_FEATURES) 1290 return IBS_CAPS_DEFAULT; 1291 1292 caps = cpuid_eax(IBS_CPUID_FEATURES); 1293 if (!(caps & IBS_CAPS_AVAIL)) 1294 /* cpuid flags not valid */ 1295 return IBS_CAPS_DEFAULT; 1296 1297 return caps; 1298 } 1299 1300 u32 get_ibs_caps(void) 1301 { 1302 return ibs_caps; 1303 } 1304 1305 EXPORT_SYMBOL(get_ibs_caps); 1306 1307 static inline int get_eilvt(int offset) 1308 { 1309 return !setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 1); 1310 } 1311 1312 static inline int put_eilvt(int offset) 1313 { 1314 return !setup_APIC_eilvt(offset, 0, 0, 1); 1315 } 1316 1317 /* 1318 * Check and reserve APIC extended interrupt LVT offset for IBS if available. 1319 */ 1320 static inline int ibs_eilvt_valid(void) 1321 { 1322 int offset; 1323 u64 val; 1324 int valid = 0; 1325 1326 preempt_disable(); 1327 1328 rdmsrl(MSR_AMD64_IBSCTL, val); 1329 offset = val & IBSCTL_LVT_OFFSET_MASK; 1330 1331 if (!(val & IBSCTL_LVT_OFFSET_VALID)) { 1332 pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n", 1333 smp_processor_id(), offset, MSR_AMD64_IBSCTL, val); 1334 goto out; 1335 } 1336 1337 if (!get_eilvt(offset)) { 1338 pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n", 1339 smp_processor_id(), offset, MSR_AMD64_IBSCTL, val); 1340 goto out; 1341 } 1342 1343 valid = 1; 1344 out: 1345 preempt_enable(); 1346 1347 return valid; 1348 } 1349 1350 static int setup_ibs_ctl(int ibs_eilvt_off) 1351 { 1352 struct pci_dev *cpu_cfg; 1353 int nodes; 1354 u32 value = 0; 1355 1356 nodes = 0; 1357 cpu_cfg = NULL; 1358 do { 1359 cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD, 1360 PCI_DEVICE_ID_AMD_10H_NB_MISC, 1361 cpu_cfg); 1362 if (!cpu_cfg) 1363 break; 1364 ++nodes; 1365 pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off 1366 | IBSCTL_LVT_OFFSET_VALID); 1367 pci_read_config_dword(cpu_cfg, IBSCTL, &value); 1368 if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) { 1369 pci_dev_put(cpu_cfg); 1370 pr_debug("Failed to setup IBS LVT offset, IBSCTL = 0x%08x\n", 1371 value); 1372 return -EINVAL; 1373 } 1374 } while (1); 1375 1376 if (!nodes) { 1377 pr_debug("No CPU node configured for IBS\n"); 1378 return -ENODEV; 1379 } 1380 1381 return 0; 1382 } 1383 1384 /* 1385 * This runs only on the current cpu. We try to find an LVT offset and 1386 * setup the local APIC. For this we must disable preemption. On 1387 * success we initialize all nodes with this offset. This updates then 1388 * the offset in the IBS_CTL per-node msr. The per-core APIC setup of 1389 * the IBS interrupt vector is handled by perf_ibs_cpu_notifier that 1390 * is using the new offset. 1391 */ 1392 static void force_ibs_eilvt_setup(void) 1393 { 1394 int offset; 1395 int ret; 1396 1397 preempt_disable(); 1398 /* find the next free available EILVT entry, skip offset 0 */ 1399 for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) { 1400 if (get_eilvt(offset)) 1401 break; 1402 } 1403 preempt_enable(); 1404 1405 if (offset == APIC_EILVT_NR_MAX) { 1406 pr_debug("No EILVT entry available\n"); 1407 return; 1408 } 1409 1410 ret = setup_ibs_ctl(offset); 1411 if (ret) 1412 goto out; 1413 1414 if (!ibs_eilvt_valid()) 1415 goto out; 1416 1417 pr_info("LVT offset %d assigned\n", offset); 1418 1419 return; 1420 out: 1421 preempt_disable(); 1422 put_eilvt(offset); 1423 preempt_enable(); 1424 return; 1425 } 1426 1427 static void ibs_eilvt_setup(void) 1428 { 1429 /* 1430 * Force LVT offset assignment for family 10h: The offsets are 1431 * not assigned by the BIOS for this family, so the OS is 1432 * responsible for doing it. If the OS assignment fails, fall 1433 * back to BIOS settings and try to setup this. 1434 */ 1435 if (boot_cpu_data.x86 == 0x10) 1436 force_ibs_eilvt_setup(); 1437 } 1438 1439 static inline int get_ibs_lvt_offset(void) 1440 { 1441 u64 val; 1442 1443 rdmsrl(MSR_AMD64_IBSCTL, val); 1444 if (!(val & IBSCTL_LVT_OFFSET_VALID)) 1445 return -EINVAL; 1446 1447 return val & IBSCTL_LVT_OFFSET_MASK; 1448 } 1449 1450 static void setup_APIC_ibs(void) 1451 { 1452 int offset; 1453 1454 offset = get_ibs_lvt_offset(); 1455 if (offset < 0) 1456 goto failed; 1457 1458 if (!setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 0)) 1459 return; 1460 failed: 1461 pr_warn("perf: IBS APIC setup failed on cpu #%d\n", 1462 smp_processor_id()); 1463 } 1464 1465 static void clear_APIC_ibs(void) 1466 { 1467 int offset; 1468 1469 offset = get_ibs_lvt_offset(); 1470 if (offset >= 0) 1471 setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1); 1472 } 1473 1474 static int x86_pmu_amd_ibs_starting_cpu(unsigned int cpu) 1475 { 1476 setup_APIC_ibs(); 1477 return 0; 1478 } 1479 1480 #ifdef CONFIG_PM 1481 1482 static int perf_ibs_suspend(void) 1483 { 1484 clear_APIC_ibs(); 1485 return 0; 1486 } 1487 1488 static void perf_ibs_resume(void) 1489 { 1490 ibs_eilvt_setup(); 1491 setup_APIC_ibs(); 1492 } 1493 1494 static struct syscore_ops perf_ibs_syscore_ops = { 1495 .resume = perf_ibs_resume, 1496 .suspend = perf_ibs_suspend, 1497 }; 1498 1499 static void perf_ibs_pm_init(void) 1500 { 1501 register_syscore_ops(&perf_ibs_syscore_ops); 1502 } 1503 1504 #else 1505 1506 static inline void perf_ibs_pm_init(void) { } 1507 1508 #endif 1509 1510 static int x86_pmu_amd_ibs_dying_cpu(unsigned int cpu) 1511 { 1512 clear_APIC_ibs(); 1513 return 0; 1514 } 1515 1516 static __init int amd_ibs_init(void) 1517 { 1518 u32 caps; 1519 1520 caps = __get_ibs_caps(); 1521 if (!caps) 1522 return -ENODEV; /* ibs not supported by the cpu */ 1523 1524 ibs_eilvt_setup(); 1525 1526 if (!ibs_eilvt_valid()) 1527 return -EINVAL; 1528 1529 perf_ibs_pm_init(); 1530 1531 ibs_caps = caps; 1532 /* make ibs_caps visible to other cpus: */ 1533 smp_mb(); 1534 /* 1535 * x86_pmu_amd_ibs_starting_cpu will be called from core on 1536 * all online cpus. 1537 */ 1538 cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_IBS_STARTING, 1539 "perf/x86/amd/ibs:starting", 1540 x86_pmu_amd_ibs_starting_cpu, 1541 x86_pmu_amd_ibs_dying_cpu); 1542 1543 return perf_event_ibs_init(); 1544 } 1545 1546 /* Since we need the pci subsystem to init ibs we can't do this earlier: */ 1547 device_initcall(amd_ibs_init); 1548
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.