1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/perf_event.h> 3 #include <linux/types.h> 4 5 #include <asm/cpu_device_id.h> 6 #include <asm/perf_event.h> 7 #include <asm/msr.h> 8 9 #include "../perf_event.h" 10 11 /* 12 * Intel LBR_SELECT bits 13 * Intel Vol3a, April 2011, Section 16.7 Table 16-10 14 * 15 * Hardware branch filter (not available on all CPUs) 16 */ 17 #define LBR_KERNEL_BIT 0 /* do not capture at ring0 */ 18 #define LBR_USER_BIT 1 /* do not capture at ring > 0 */ 19 #define LBR_JCC_BIT 2 /* do not capture conditional branches */ 20 #define LBR_REL_CALL_BIT 3 /* do not capture relative calls */ 21 #define LBR_IND_CALL_BIT 4 /* do not capture indirect calls */ 22 #define LBR_RETURN_BIT 5 /* do not capture near returns */ 23 #define LBR_IND_JMP_BIT 6 /* do not capture indirect jumps */ 24 #define LBR_REL_JMP_BIT 7 /* do not capture relative jumps */ 25 #define LBR_FAR_BIT 8 /* do not capture far branches */ 26 #define LBR_CALL_STACK_BIT 9 /* enable call stack */ 27 28 /* 29 * Following bit only exists in Linux; we mask it out before writing it to 30 * the actual MSR. But it helps the constraint perf code to understand 31 * that this is a separate configuration. 32 */ 33 #define LBR_NO_INFO_BIT 63 /* don't read LBR_INFO. */ 34 35 #define LBR_KERNEL (1 << LBR_KERNEL_BIT) 36 #define LBR_USER (1 << LBR_USER_BIT) 37 #define LBR_JCC (1 << LBR_JCC_BIT) 38 #define LBR_REL_CALL (1 << LBR_REL_CALL_BIT) 39 #define LBR_IND_CALL (1 << LBR_IND_CALL_BIT) 40 #define LBR_RETURN (1 << LBR_RETURN_BIT) 41 #define LBR_REL_JMP (1 << LBR_REL_JMP_BIT) 42 #define LBR_IND_JMP (1 << LBR_IND_JMP_BIT) 43 #define LBR_FAR (1 << LBR_FAR_BIT) 44 #define LBR_CALL_STACK (1 << LBR_CALL_STACK_BIT) 45 #define LBR_NO_INFO (1ULL << LBR_NO_INFO_BIT) 46 47 #define LBR_PLM (LBR_KERNEL | LBR_USER) 48 49 #define LBR_SEL_MASK 0x3ff /* valid bits in LBR_SELECT */ 50 #define LBR_NOT_SUPP -1 /* LBR filter not supported */ 51 #define LBR_IGN 0 /* ignored */ 52 53 #define LBR_ANY \ 54 (LBR_JCC |\ 55 LBR_REL_CALL |\ 56 LBR_IND_CALL |\ 57 LBR_RETURN |\ 58 LBR_REL_JMP |\ 59 LBR_IND_JMP |\ 60 LBR_FAR) 61 62 #define LBR_FROM_FLAG_MISPRED BIT_ULL(63) 63 #define LBR_FROM_FLAG_IN_TX BIT_ULL(62) 64 #define LBR_FROM_FLAG_ABORT BIT_ULL(61) 65 66 #define LBR_FROM_SIGNEXT_2MSB (BIT_ULL(60) | BIT_ULL(59)) 67 68 /* 69 * Intel LBR_CTL bits 70 * 71 * Hardware branch filter for Arch LBR 72 */ 73 #define ARCH_LBR_KERNEL_BIT 1 /* capture at ring0 */ 74 #define ARCH_LBR_USER_BIT 2 /* capture at ring > 0 */ 75 #define ARCH_LBR_CALL_STACK_BIT 3 /* enable call stack */ 76 #define ARCH_LBR_JCC_BIT 16 /* capture conditional branches */ 77 #define ARCH_LBR_REL_JMP_BIT 17 /* capture relative jumps */ 78 #define ARCH_LBR_IND_JMP_BIT 18 /* capture indirect jumps */ 79 #define ARCH_LBR_REL_CALL_BIT 19 /* capture relative calls */ 80 #define ARCH_LBR_IND_CALL_BIT 20 /* capture indirect calls */ 81 #define ARCH_LBR_RETURN_BIT 21 /* capture near returns */ 82 #define ARCH_LBR_OTHER_BRANCH_BIT 22 /* capture other branches */ 83 84 #define ARCH_LBR_KERNEL (1ULL << ARCH_LBR_KERNEL_BIT) 85 #define ARCH_LBR_USER (1ULL << ARCH_LBR_USER_BIT) 86 #define ARCH_LBR_CALL_STACK (1ULL << ARCH_LBR_CALL_STACK_BIT) 87 #define ARCH_LBR_JCC (1ULL << ARCH_LBR_JCC_BIT) 88 #define ARCH_LBR_REL_JMP (1ULL << ARCH_LBR_REL_JMP_BIT) 89 #define ARCH_LBR_IND_JMP (1ULL << ARCH_LBR_IND_JMP_BIT) 90 #define ARCH_LBR_REL_CALL (1ULL << ARCH_LBR_REL_CALL_BIT) 91 #define ARCH_LBR_IND_CALL (1ULL << ARCH_LBR_IND_CALL_BIT) 92 #define ARCH_LBR_RETURN (1ULL << ARCH_LBR_RETURN_BIT) 93 #define ARCH_LBR_OTHER_BRANCH (1ULL << ARCH_LBR_OTHER_BRANCH_BIT) 94 95 #define ARCH_LBR_ANY \ 96 (ARCH_LBR_JCC |\ 97 ARCH_LBR_REL_JMP |\ 98 ARCH_LBR_IND_JMP |\ 99 ARCH_LBR_REL_CALL |\ 100 ARCH_LBR_IND_CALL |\ 101 ARCH_LBR_RETURN |\ 102 ARCH_LBR_OTHER_BRANCH) 103 104 #define ARCH_LBR_CTL_MASK 0x7f000e 105 106 static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc); 107 108 static __always_inline bool is_lbr_call_stack_bit_set(u64 config) 109 { 110 if (static_cpu_has(X86_FEATURE_ARCH_LBR)) 111 return !!(config & ARCH_LBR_CALL_STACK); 112 113 return !!(config & LBR_CALL_STACK); 114 } 115 116 /* 117 * We only support LBR implementations that have FREEZE_LBRS_ON_PMI 118 * otherwise it becomes near impossible to get a reliable stack. 119 */ 120 121 static void __intel_pmu_lbr_enable(bool pmi) 122 { 123 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 124 u64 debugctl, lbr_select = 0, orig_debugctl; 125 126 /* 127 * No need to unfreeze manually, as v4 can do that as part 128 * of the GLOBAL_STATUS ack. 129 */ 130 if (pmi && x86_pmu.version >= 4) 131 return; 132 133 /* 134 * No need to reprogram LBR_SELECT in a PMI, as it 135 * did not change. 136 */ 137 if (cpuc->lbr_sel) 138 lbr_select = cpuc->lbr_sel->config & x86_pmu.lbr_sel_mask; 139 if (!static_cpu_has(X86_FEATURE_ARCH_LBR) && !pmi && cpuc->lbr_sel) 140 wrmsrl(MSR_LBR_SELECT, lbr_select); 141 142 rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); 143 orig_debugctl = debugctl; 144 145 if (!static_cpu_has(X86_FEATURE_ARCH_LBR)) 146 debugctl |= DEBUGCTLMSR_LBR; 147 /* 148 * LBR callstack does not work well with FREEZE_LBRS_ON_PMI. 149 * If FREEZE_LBRS_ON_PMI is set, PMI near call/return instructions 150 * may cause superfluous increase/decrease of LBR_TOS. 151 */ 152 if (is_lbr_call_stack_bit_set(lbr_select)) 153 debugctl &= ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI; 154 else 155 debugctl |= DEBUGCTLMSR_FREEZE_LBRS_ON_PMI; 156 157 if (orig_debugctl != debugctl) 158 wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); 159 160 if (static_cpu_has(X86_FEATURE_ARCH_LBR)) 161 wrmsrl(MSR_ARCH_LBR_CTL, lbr_select | ARCH_LBR_CTL_LBREN); 162 } 163 164 void intel_pmu_lbr_reset_32(void) 165 { 166 int i; 167 168 for (i = 0; i < x86_pmu.lbr_nr; i++) 169 wrmsrl(x86_pmu.lbr_from + i, 0); 170 } 171 172 void intel_pmu_lbr_reset_64(void) 173 { 174 int i; 175 176 for (i = 0; i < x86_pmu.lbr_nr; i++) { 177 wrmsrl(x86_pmu.lbr_from + i, 0); 178 wrmsrl(x86_pmu.lbr_to + i, 0); 179 if (x86_pmu.lbr_has_info) 180 wrmsrl(x86_pmu.lbr_info + i, 0); 181 } 182 } 183 184 static void intel_pmu_arch_lbr_reset(void) 185 { 186 /* Write to ARCH_LBR_DEPTH MSR, all LBR entries are reset to 0 */ 187 wrmsrl(MSR_ARCH_LBR_DEPTH, x86_pmu.lbr_nr); 188 } 189 190 void intel_pmu_lbr_reset(void) 191 { 192 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 193 194 if (!x86_pmu.lbr_nr) 195 return; 196 197 x86_pmu.lbr_reset(); 198 199 cpuc->last_task_ctx = NULL; 200 cpuc->last_log_id = 0; 201 if (!static_cpu_has(X86_FEATURE_ARCH_LBR) && cpuc->lbr_select) 202 wrmsrl(MSR_LBR_SELECT, 0); 203 } 204 205 /* 206 * TOS = most recently recorded branch 207 */ 208 static inline u64 intel_pmu_lbr_tos(void) 209 { 210 u64 tos; 211 212 rdmsrl(x86_pmu.lbr_tos, tos); 213 return tos; 214 } 215 216 enum { 217 LBR_NONE, 218 LBR_VALID, 219 }; 220 221 /* 222 * For format LBR_FORMAT_EIP_FLAGS2, bits 61:62 in MSR_LAST_BRANCH_FROM_x 223 * are the TSX flags when TSX is supported, but when TSX is not supported 224 * they have no consistent behavior: 225 * 226 * - For wrmsr(), bits 61:62 are considered part of the sign extension. 227 * - For HW updates (branch captures) bits 61:62 are always OFF and are not 228 * part of the sign extension. 229 * 230 * Therefore, if: 231 * 232 * 1) LBR format LBR_FORMAT_EIP_FLAGS2 233 * 2) CPU has no TSX support enabled 234 * 235 * ... then any value passed to wrmsr() must be sign extended to 63 bits and any 236 * value from rdmsr() must be converted to have a 61 bits sign extension, 237 * ignoring the TSX flags. 238 */ 239 static inline bool lbr_from_signext_quirk_needed(void) 240 { 241 bool tsx_support = boot_cpu_has(X86_FEATURE_HLE) || 242 boot_cpu_has(X86_FEATURE_RTM); 243 244 return !tsx_support; 245 } 246 247 static DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key); 248 249 /* If quirk is enabled, ensure sign extension is 63 bits: */ 250 inline u64 lbr_from_signext_quirk_wr(u64 val) 251 { 252 if (static_branch_unlikely(&lbr_from_quirk_key)) { 253 /* 254 * Sign extend into bits 61:62 while preserving bit 63. 255 * 256 * Quirk is enabled when TSX is disabled. Therefore TSX bits 257 * in val are always OFF and must be changed to be sign 258 * extension bits. Since bits 59:60 are guaranteed to be 259 * part of the sign extension bits, we can just copy them 260 * to 61:62. 261 */ 262 val |= (LBR_FROM_SIGNEXT_2MSB & val) << 2; 263 } 264 return val; 265 } 266 267 /* 268 * If quirk is needed, ensure sign extension is 61 bits: 269 */ 270 static u64 lbr_from_signext_quirk_rd(u64 val) 271 { 272 if (static_branch_unlikely(&lbr_from_quirk_key)) { 273 /* 274 * Quirk is on when TSX is not enabled. Therefore TSX 275 * flags must be read as OFF. 276 */ 277 val &= ~(LBR_FROM_FLAG_IN_TX | LBR_FROM_FLAG_ABORT); 278 } 279 return val; 280 } 281 282 static __always_inline void wrlbr_from(unsigned int idx, u64 val) 283 { 284 val = lbr_from_signext_quirk_wr(val); 285 wrmsrl(x86_pmu.lbr_from + idx, val); 286 } 287 288 static __always_inline void wrlbr_to(unsigned int idx, u64 val) 289 { 290 wrmsrl(x86_pmu.lbr_to + idx, val); 291 } 292 293 static __always_inline void wrlbr_info(unsigned int idx, u64 val) 294 { 295 wrmsrl(x86_pmu.lbr_info + idx, val); 296 } 297 298 static __always_inline u64 rdlbr_from(unsigned int idx, struct lbr_entry *lbr) 299 { 300 u64 val; 301 302 if (lbr) 303 return lbr->from; 304 305 rdmsrl(x86_pmu.lbr_from + idx, val); 306 307 return lbr_from_signext_quirk_rd(val); 308 } 309 310 static __always_inline u64 rdlbr_to(unsigned int idx, struct lbr_entry *lbr) 311 { 312 u64 val; 313 314 if (lbr) 315 return lbr->to; 316 317 rdmsrl(x86_pmu.lbr_to + idx, val); 318 319 return val; 320 } 321 322 static __always_inline u64 rdlbr_info(unsigned int idx, struct lbr_entry *lbr) 323 { 324 u64 val; 325 326 if (lbr) 327 return lbr->info; 328 329 rdmsrl(x86_pmu.lbr_info + idx, val); 330 331 return val; 332 } 333 334 static inline void 335 wrlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info) 336 { 337 wrlbr_from(idx, lbr->from); 338 wrlbr_to(idx, lbr->to); 339 if (need_info) 340 wrlbr_info(idx, lbr->info); 341 } 342 343 static inline bool 344 rdlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info) 345 { 346 u64 from = rdlbr_from(idx, NULL); 347 348 /* Don't read invalid entry */ 349 if (!from) 350 return false; 351 352 lbr->from = from; 353 lbr->to = rdlbr_to(idx, NULL); 354 if (need_info) 355 lbr->info = rdlbr_info(idx, NULL); 356 357 return true; 358 } 359 360 void intel_pmu_lbr_restore(void *ctx) 361 { 362 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 363 struct x86_perf_task_context *task_ctx = ctx; 364 bool need_info = x86_pmu.lbr_has_info; 365 u64 tos = task_ctx->tos; 366 unsigned lbr_idx, mask; 367 int i; 368 369 mask = x86_pmu.lbr_nr - 1; 370 for (i = 0; i < task_ctx->valid_lbrs; i++) { 371 lbr_idx = (tos - i) & mask; 372 wrlbr_all(&task_ctx->lbr[i], lbr_idx, need_info); 373 } 374 375 for (; i < x86_pmu.lbr_nr; i++) { 376 lbr_idx = (tos - i) & mask; 377 wrlbr_from(lbr_idx, 0); 378 wrlbr_to(lbr_idx, 0); 379 if (need_info) 380 wrlbr_info(lbr_idx, 0); 381 } 382 383 wrmsrl(x86_pmu.lbr_tos, tos); 384 385 if (cpuc->lbr_select) 386 wrmsrl(MSR_LBR_SELECT, task_ctx->lbr_sel); 387 } 388 389 static void intel_pmu_arch_lbr_restore(void *ctx) 390 { 391 struct x86_perf_task_context_arch_lbr *task_ctx = ctx; 392 struct lbr_entry *entries = task_ctx->entries; 393 int i; 394 395 /* Fast reset the LBRs before restore if the call stack is not full. */ 396 if (!entries[x86_pmu.lbr_nr - 1].from) 397 intel_pmu_arch_lbr_reset(); 398 399 for (i = 0; i < x86_pmu.lbr_nr; i++) { 400 if (!entries[i].from) 401 break; 402 wrlbr_all(&entries[i], i, true); 403 } 404 } 405 406 /* 407 * Restore the Architecture LBR state from the xsave area in the perf 408 * context data for the task via the XRSTORS instruction. 409 */ 410 static void intel_pmu_arch_lbr_xrstors(void *ctx) 411 { 412 struct x86_perf_task_context_arch_lbr_xsave *task_ctx = ctx; 413 414 xrstors(&task_ctx->xsave, XFEATURE_MASK_LBR); 415 } 416 417 static __always_inline bool lbr_is_reset_in_cstate(void *ctx) 418 { 419 if (static_cpu_has(X86_FEATURE_ARCH_LBR)) 420 return x86_pmu.lbr_deep_c_reset && !rdlbr_from(0, NULL); 421 422 return !rdlbr_from(((struct x86_perf_task_context *)ctx)->tos, NULL); 423 } 424 425 static void __intel_pmu_lbr_restore(void *ctx) 426 { 427 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 428 429 if (task_context_opt(ctx)->lbr_callstack_users == 0 || 430 task_context_opt(ctx)->lbr_stack_state == LBR_NONE) { 431 intel_pmu_lbr_reset(); 432 return; 433 } 434 435 /* 436 * Does not restore the LBR registers, if 437 * - No one else touched them, and 438 * - Was not cleared in Cstate 439 */ 440 if ((ctx == cpuc->last_task_ctx) && 441 (task_context_opt(ctx)->log_id == cpuc->last_log_id) && 442 !lbr_is_reset_in_cstate(ctx)) { 443 task_context_opt(ctx)->lbr_stack_state = LBR_NONE; 444 return; 445 } 446 447 x86_pmu.lbr_restore(ctx); 448 449 task_context_opt(ctx)->lbr_stack_state = LBR_NONE; 450 } 451 452 void intel_pmu_lbr_save(void *ctx) 453 { 454 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 455 struct x86_perf_task_context *task_ctx = ctx; 456 bool need_info = x86_pmu.lbr_has_info; 457 unsigned lbr_idx, mask; 458 u64 tos; 459 int i; 460 461 mask = x86_pmu.lbr_nr - 1; 462 tos = intel_pmu_lbr_tos(); 463 for (i = 0; i < x86_pmu.lbr_nr; i++) { 464 lbr_idx = (tos - i) & mask; 465 if (!rdlbr_all(&task_ctx->lbr[i], lbr_idx, need_info)) 466 break; 467 } 468 task_ctx->valid_lbrs = i; 469 task_ctx->tos = tos; 470 471 if (cpuc->lbr_select) 472 rdmsrl(MSR_LBR_SELECT, task_ctx->lbr_sel); 473 } 474 475 static void intel_pmu_arch_lbr_save(void *ctx) 476 { 477 struct x86_perf_task_context_arch_lbr *task_ctx = ctx; 478 struct lbr_entry *entries = task_ctx->entries; 479 int i; 480 481 for (i = 0; i < x86_pmu.lbr_nr; i++) { 482 if (!rdlbr_all(&entries[i], i, true)) 483 break; 484 } 485 486 /* LBR call stack is not full. Reset is required in restore. */ 487 if (i < x86_pmu.lbr_nr) 488 entries[x86_pmu.lbr_nr - 1].from = 0; 489 } 490 491 /* 492 * Save the Architecture LBR state to the xsave area in the perf 493 * context data for the task via the XSAVES instruction. 494 */ 495 static void intel_pmu_arch_lbr_xsaves(void *ctx) 496 { 497 struct x86_perf_task_context_arch_lbr_xsave *task_ctx = ctx; 498 499 xsaves(&task_ctx->xsave, XFEATURE_MASK_LBR); 500 } 501 502 static void __intel_pmu_lbr_save(void *ctx) 503 { 504 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 505 506 if (task_context_opt(ctx)->lbr_callstack_users == 0) { 507 task_context_opt(ctx)->lbr_stack_state = LBR_NONE; 508 return; 509 } 510 511 x86_pmu.lbr_save(ctx); 512 513 task_context_opt(ctx)->lbr_stack_state = LBR_VALID; 514 515 cpuc->last_task_ctx = ctx; 516 cpuc->last_log_id = ++task_context_opt(ctx)->log_id; 517 } 518 519 void intel_pmu_lbr_swap_task_ctx(struct perf_event_pmu_context *prev_epc, 520 struct perf_event_pmu_context *next_epc) 521 { 522 void *prev_ctx_data, *next_ctx_data; 523 524 swap(prev_epc->task_ctx_data, next_epc->task_ctx_data); 525 526 /* 527 * Architecture specific synchronization makes sense in case 528 * both prev_epc->task_ctx_data and next_epc->task_ctx_data 529 * pointers are allocated. 530 */ 531 532 prev_ctx_data = next_epc->task_ctx_data; 533 next_ctx_data = prev_epc->task_ctx_data; 534 535 if (!prev_ctx_data || !next_ctx_data) 536 return; 537 538 swap(task_context_opt(prev_ctx_data)->lbr_callstack_users, 539 task_context_opt(next_ctx_data)->lbr_callstack_users); 540 } 541 542 void intel_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) 543 { 544 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 545 void *task_ctx; 546 547 if (!cpuc->lbr_users) 548 return; 549 550 /* 551 * If LBR callstack feature is enabled and the stack was saved when 552 * the task was scheduled out, restore the stack. Otherwise flush 553 * the LBR stack. 554 */ 555 task_ctx = pmu_ctx ? pmu_ctx->task_ctx_data : NULL; 556 if (task_ctx) { 557 if (sched_in) 558 __intel_pmu_lbr_restore(task_ctx); 559 else 560 __intel_pmu_lbr_save(task_ctx); 561 return; 562 } 563 564 /* 565 * Since a context switch can flip the address space and LBR entries 566 * are not tagged with an identifier, we need to wipe the LBR, even for 567 * per-cpu events. You simply cannot resolve the branches from the old 568 * address space. 569 */ 570 if (sched_in) 571 intel_pmu_lbr_reset(); 572 } 573 574 static inline bool branch_user_callstack(unsigned br_sel) 575 { 576 return (br_sel & X86_BR_USER) && (br_sel & X86_BR_CALL_STACK); 577 } 578 579 void intel_pmu_lbr_add(struct perf_event *event) 580 { 581 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 582 583 if (!x86_pmu.lbr_nr) 584 return; 585 586 if (event->hw.flags & PERF_X86_EVENT_LBR_SELECT) 587 cpuc->lbr_select = 1; 588 589 cpuc->br_sel = event->hw.branch_reg.reg; 590 591 if (branch_user_callstack(cpuc->br_sel) && event->pmu_ctx->task_ctx_data) 592 task_context_opt(event->pmu_ctx->task_ctx_data)->lbr_callstack_users++; 593 594 /* 595 * Request pmu::sched_task() callback, which will fire inside the 596 * regular perf event scheduling, so that call will: 597 * 598 * - restore or wipe; when LBR-callstack, 599 * - wipe; otherwise, 600 * 601 * when this is from __perf_event_task_sched_in(). 602 * 603 * However, if this is from perf_install_in_context(), no such callback 604 * will follow and we'll need to reset the LBR here if this is the 605 * first LBR event. 606 * 607 * The problem is, we cannot tell these cases apart... but we can 608 * exclude the biggest chunk of cases by looking at 609 * event->total_time_running. An event that has accrued runtime cannot 610 * be 'new'. Conversely, a new event can get installed through the 611 * context switch path for the first time. 612 */ 613 if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip > 0) 614 cpuc->lbr_pebs_users++; 615 perf_sched_cb_inc(event->pmu); 616 if (!cpuc->lbr_users++ && !event->total_time_running) 617 intel_pmu_lbr_reset(); 618 } 619 620 void release_lbr_buffers(void) 621 { 622 struct kmem_cache *kmem_cache; 623 struct cpu_hw_events *cpuc; 624 int cpu; 625 626 if (!static_cpu_has(X86_FEATURE_ARCH_LBR)) 627 return; 628 629 for_each_possible_cpu(cpu) { 630 cpuc = per_cpu_ptr(&cpu_hw_events, cpu); 631 kmem_cache = x86_get_pmu(cpu)->task_ctx_cache; 632 if (kmem_cache && cpuc->lbr_xsave) { 633 kmem_cache_free(kmem_cache, cpuc->lbr_xsave); 634 cpuc->lbr_xsave = NULL; 635 } 636 } 637 } 638 639 void reserve_lbr_buffers(void) 640 { 641 struct kmem_cache *kmem_cache; 642 struct cpu_hw_events *cpuc; 643 int cpu; 644 645 if (!static_cpu_has(X86_FEATURE_ARCH_LBR)) 646 return; 647 648 for_each_possible_cpu(cpu) { 649 cpuc = per_cpu_ptr(&cpu_hw_events, cpu); 650 kmem_cache = x86_get_pmu(cpu)->task_ctx_cache; 651 if (!kmem_cache || cpuc->lbr_xsave) 652 continue; 653 654 cpuc->lbr_xsave = kmem_cache_alloc_node(kmem_cache, 655 GFP_KERNEL | __GFP_ZERO, 656 cpu_to_node(cpu)); 657 } 658 } 659 660 void intel_pmu_lbr_del(struct perf_event *event) 661 { 662 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 663 664 if (!x86_pmu.lbr_nr) 665 return; 666 667 if (branch_user_callstack(cpuc->br_sel) && 668 event->pmu_ctx->task_ctx_data) 669 task_context_opt(event->pmu_ctx->task_ctx_data)->lbr_callstack_users--; 670 671 if (event->hw.flags & PERF_X86_EVENT_LBR_SELECT) 672 cpuc->lbr_select = 0; 673 674 if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip > 0) 675 cpuc->lbr_pebs_users--; 676 cpuc->lbr_users--; 677 WARN_ON_ONCE(cpuc->lbr_users < 0); 678 WARN_ON_ONCE(cpuc->lbr_pebs_users < 0); 679 perf_sched_cb_dec(event->pmu); 680 681 /* 682 * The logged occurrences information is only valid for the 683 * current LBR group. If another LBR group is scheduled in 684 * later, the information from the stale LBRs will be wrongly 685 * interpreted. Reset the LBRs here. 686 * 687 * Only clear once for a branch counter group with the leader 688 * event. Because 689 * - Cannot simply reset the LBRs with the !cpuc->lbr_users. 690 * Because it's possible that the last LBR user is not in a 691 * branch counter group, e.g., a branch_counters group + 692 * several normal LBR events. 693 * - The LBR reset can be done with any one of the events in a 694 * branch counter group, since they are always scheduled together. 695 * It's easy to force the leader event an LBR event. 696 */ 697 if (is_branch_counters_group(event) && event == event->group_leader) 698 intel_pmu_lbr_reset(); 699 } 700 701 static inline bool vlbr_exclude_host(void) 702 { 703 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 704 705 return test_bit(INTEL_PMC_IDX_FIXED_VLBR, 706 (unsigned long *)&cpuc->intel_ctrl_guest_mask); 707 } 708 709 void intel_pmu_lbr_enable_all(bool pmi) 710 { 711 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 712 713 if (cpuc->lbr_users && !vlbr_exclude_host()) 714 __intel_pmu_lbr_enable(pmi); 715 } 716 717 void intel_pmu_lbr_disable_all(void) 718 { 719 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 720 721 if (cpuc->lbr_users && !vlbr_exclude_host()) { 722 if (static_cpu_has(X86_FEATURE_ARCH_LBR)) 723 return __intel_pmu_arch_lbr_disable(); 724 725 __intel_pmu_lbr_disable(); 726 } 727 } 728 729 void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) 730 { 731 unsigned long mask = x86_pmu.lbr_nr - 1; 732 struct perf_branch_entry *br = cpuc->lbr_entries; 733 u64 tos = intel_pmu_lbr_tos(); 734 int i; 735 736 for (i = 0; i < x86_pmu.lbr_nr; i++) { 737 unsigned long lbr_idx = (tos - i) & mask; 738 union { 739 struct { 740 u32 from; 741 u32 to; 742 }; 743 u64 lbr; 744 } msr_lastbranch; 745 746 rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr); 747 748 perf_clear_branch_entry_bitfields(br); 749 750 br->from = msr_lastbranch.from; 751 br->to = msr_lastbranch.to; 752 br++; 753 } 754 cpuc->lbr_stack.nr = i; 755 cpuc->lbr_stack.hw_idx = tos; 756 } 757 758 /* 759 * Due to lack of segmentation in Linux the effective address (offset) 760 * is the same as the linear address, allowing us to merge the LIP and EIP 761 * LBR formats. 762 */ 763 void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) 764 { 765 bool need_info = false, call_stack = false; 766 unsigned long mask = x86_pmu.lbr_nr - 1; 767 struct perf_branch_entry *br = cpuc->lbr_entries; 768 u64 tos = intel_pmu_lbr_tos(); 769 int i; 770 int out = 0; 771 int num = x86_pmu.lbr_nr; 772 773 if (cpuc->lbr_sel) { 774 need_info = !(cpuc->lbr_sel->config & LBR_NO_INFO); 775 if (cpuc->lbr_sel->config & LBR_CALL_STACK) 776 call_stack = true; 777 } 778 779 for (i = 0; i < num; i++) { 780 unsigned long lbr_idx = (tos - i) & mask; 781 u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0; 782 u16 cycles = 0; 783 784 from = rdlbr_from(lbr_idx, NULL); 785 to = rdlbr_to(lbr_idx, NULL); 786 787 /* 788 * Read LBR call stack entries 789 * until invalid entry (0s) is detected. 790 */ 791 if (call_stack && !from) 792 break; 793 794 if (x86_pmu.lbr_has_info) { 795 if (need_info) { 796 u64 info; 797 798 info = rdlbr_info(lbr_idx, NULL); 799 mis = !!(info & LBR_INFO_MISPRED); 800 pred = !mis; 801 cycles = (info & LBR_INFO_CYCLES); 802 if (x86_pmu.lbr_has_tsx) { 803 in_tx = !!(info & LBR_INFO_IN_TX); 804 abort = !!(info & LBR_INFO_ABORT); 805 } 806 } 807 } else { 808 int skip = 0; 809 810 if (x86_pmu.lbr_from_flags) { 811 mis = !!(from & LBR_FROM_FLAG_MISPRED); 812 pred = !mis; 813 skip = 1; 814 } 815 if (x86_pmu.lbr_has_tsx) { 816 in_tx = !!(from & LBR_FROM_FLAG_IN_TX); 817 abort = !!(from & LBR_FROM_FLAG_ABORT); 818 skip = 3; 819 } 820 from = (u64)((((s64)from) << skip) >> skip); 821 822 if (x86_pmu.lbr_to_cycles) { 823 cycles = ((to >> 48) & LBR_INFO_CYCLES); 824 to = (u64)((((s64)to) << 16) >> 16); 825 } 826 } 827 828 /* 829 * Some CPUs report duplicated abort records, 830 * with the second entry not having an abort bit set. 831 * Skip them here. This loop runs backwards, 832 * so we need to undo the previous record. 833 * If the abort just happened outside the window 834 * the extra entry cannot be removed. 835 */ 836 if (abort && x86_pmu.lbr_double_abort && out > 0) 837 out--; 838 839 perf_clear_branch_entry_bitfields(br+out); 840 br[out].from = from; 841 br[out].to = to; 842 br[out].mispred = mis; 843 br[out].predicted = pred; 844 br[out].in_tx = in_tx; 845 br[out].abort = abort; 846 br[out].cycles = cycles; 847 out++; 848 } 849 cpuc->lbr_stack.nr = out; 850 cpuc->lbr_stack.hw_idx = tos; 851 } 852 853 static DEFINE_STATIC_KEY_FALSE(x86_lbr_mispred); 854 static DEFINE_STATIC_KEY_FALSE(x86_lbr_cycles); 855 static DEFINE_STATIC_KEY_FALSE(x86_lbr_type); 856 857 static __always_inline int get_lbr_br_type(u64 info) 858 { 859 int type = 0; 860 861 if (static_branch_likely(&x86_lbr_type)) 862 type = (info & LBR_INFO_BR_TYPE) >> LBR_INFO_BR_TYPE_OFFSET; 863 864 return type; 865 } 866 867 static __always_inline bool get_lbr_mispred(u64 info) 868 { 869 bool mispred = 0; 870 871 if (static_branch_likely(&x86_lbr_mispred)) 872 mispred = !!(info & LBR_INFO_MISPRED); 873 874 return mispred; 875 } 876 877 static __always_inline u16 get_lbr_cycles(u64 info) 878 { 879 u16 cycles = info & LBR_INFO_CYCLES; 880 881 if (static_cpu_has(X86_FEATURE_ARCH_LBR) && 882 (!static_branch_likely(&x86_lbr_cycles) || 883 !(info & LBR_INFO_CYC_CNT_VALID))) 884 cycles = 0; 885 886 return cycles; 887 } 888 889 static_assert((64 - PERF_BRANCH_ENTRY_INFO_BITS_MAX) > LBR_INFO_BR_CNTR_NUM * LBR_INFO_BR_CNTR_BITS); 890 891 static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc, 892 struct lbr_entry *entries) 893 { 894 struct perf_branch_entry *e; 895 struct lbr_entry *lbr; 896 u64 from, to, info; 897 int i; 898 899 for (i = 0; i < x86_pmu.lbr_nr; i++) { 900 lbr = entries ? &entries[i] : NULL; 901 e = &cpuc->lbr_entries[i]; 902 903 from = rdlbr_from(i, lbr); 904 /* 905 * Read LBR entries until invalid entry (0s) is detected. 906 */ 907 if (!from) 908 break; 909 910 to = rdlbr_to(i, lbr); 911 info = rdlbr_info(i, lbr); 912 913 perf_clear_branch_entry_bitfields(e); 914 915 e->from = from; 916 e->to = to; 917 e->mispred = get_lbr_mispred(info); 918 e->predicted = !e->mispred; 919 e->in_tx = !!(info & LBR_INFO_IN_TX); 920 e->abort = !!(info & LBR_INFO_ABORT); 921 e->cycles = get_lbr_cycles(info); 922 e->type = get_lbr_br_type(info); 923 924 /* 925 * Leverage the reserved field of cpuc->lbr_entries[i] to 926 * temporarily store the branch counters information. 927 * The later code will decide what content can be disclosed 928 * to the perf tool. Pleae see intel_pmu_lbr_counters_reorder(). 929 */ 930 e->reserved = (info >> LBR_INFO_BR_CNTR_OFFSET) & LBR_INFO_BR_CNTR_FULL_MASK; 931 } 932 933 cpuc->lbr_stack.nr = i; 934 } 935 936 /* 937 * The enabled order may be different from the counter order. 938 * Update the lbr_counters with the enabled order. 939 */ 940 static void intel_pmu_lbr_counters_reorder(struct cpu_hw_events *cpuc, 941 struct perf_event *event) 942 { 943 int i, j, pos = 0, order[X86_PMC_IDX_MAX]; 944 struct perf_event *leader, *sibling; 945 u64 src, dst, cnt; 946 947 leader = event->group_leader; 948 if (branch_sample_counters(leader)) 949 order[pos++] = leader->hw.idx; 950 951 for_each_sibling_event(sibling, leader) { 952 if (!branch_sample_counters(sibling)) 953 continue; 954 order[pos++] = sibling->hw.idx; 955 } 956 957 WARN_ON_ONCE(!pos); 958 959 for (i = 0; i < cpuc->lbr_stack.nr; i++) { 960 src = cpuc->lbr_entries[i].reserved; 961 dst = 0; 962 for (j = 0; j < pos; j++) { 963 cnt = (src >> (order[j] * LBR_INFO_BR_CNTR_BITS)) & LBR_INFO_BR_CNTR_MASK; 964 dst |= cnt << j * LBR_INFO_BR_CNTR_BITS; 965 } 966 cpuc->lbr_counters[i] = dst; 967 cpuc->lbr_entries[i].reserved = 0; 968 } 969 } 970 971 void intel_pmu_lbr_save_brstack(struct perf_sample_data *data, 972 struct cpu_hw_events *cpuc, 973 struct perf_event *event) 974 { 975 if (is_branch_counters_group(event)) { 976 intel_pmu_lbr_counters_reorder(cpuc, event); 977 perf_sample_save_brstack(data, event, &cpuc->lbr_stack, cpuc->lbr_counters); 978 return; 979 } 980 981 perf_sample_save_brstack(data, event, &cpuc->lbr_stack, NULL); 982 } 983 984 static void intel_pmu_arch_lbr_read(struct cpu_hw_events *cpuc) 985 { 986 intel_pmu_store_lbr(cpuc, NULL); 987 } 988 989 static void intel_pmu_arch_lbr_read_xsave(struct cpu_hw_events *cpuc) 990 { 991 struct x86_perf_task_context_arch_lbr_xsave *xsave = cpuc->lbr_xsave; 992 993 if (!xsave) { 994 intel_pmu_store_lbr(cpuc, NULL); 995 return; 996 } 997 xsaves(&xsave->xsave, XFEATURE_MASK_LBR); 998 999 intel_pmu_store_lbr(cpuc, xsave->lbr.entries); 1000 } 1001 1002 void intel_pmu_lbr_read(void) 1003 { 1004 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 1005 1006 /* 1007 * Don't read when all LBRs users are using adaptive PEBS. 1008 * 1009 * This could be smarter and actually check the event, 1010 * but this simple approach seems to work for now. 1011 */ 1012 if (!cpuc->lbr_users || vlbr_exclude_host() || 1013 cpuc->lbr_users == cpuc->lbr_pebs_users) 1014 return; 1015 1016 x86_pmu.lbr_read(cpuc); 1017 1018 intel_pmu_lbr_filter(cpuc); 1019 } 1020 1021 /* 1022 * SW filter is used: 1023 * - in case there is no HW filter 1024 * - in case the HW filter has errata or limitations 1025 */ 1026 static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event) 1027 { 1028 u64 br_type = event->attr.branch_sample_type; 1029 int mask = 0; 1030 1031 if (br_type & PERF_SAMPLE_BRANCH_USER) 1032 mask |= X86_BR_USER; 1033 1034 if (br_type & PERF_SAMPLE_BRANCH_KERNEL) 1035 mask |= X86_BR_KERNEL; 1036 1037 /* we ignore BRANCH_HV here */ 1038 1039 if (br_type & PERF_SAMPLE_BRANCH_ANY) 1040 mask |= X86_BR_ANY; 1041 1042 if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL) 1043 mask |= X86_BR_ANY_CALL; 1044 1045 if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN) 1046 mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET; 1047 1048 if (br_type & PERF_SAMPLE_BRANCH_IND_CALL) 1049 mask |= X86_BR_IND_CALL; 1050 1051 if (br_type & PERF_SAMPLE_BRANCH_ABORT_TX) 1052 mask |= X86_BR_ABORT; 1053 1054 if (br_type & PERF_SAMPLE_BRANCH_IN_TX) 1055 mask |= X86_BR_IN_TX; 1056 1057 if (br_type & PERF_SAMPLE_BRANCH_NO_TX) 1058 mask |= X86_BR_NO_TX; 1059 1060 if (br_type & PERF_SAMPLE_BRANCH_COND) 1061 mask |= X86_BR_JCC; 1062 1063 if (br_type & PERF_SAMPLE_BRANCH_CALL_STACK) { 1064 if (!x86_pmu_has_lbr_callstack()) 1065 return -EOPNOTSUPP; 1066 if (mask & ~(X86_BR_USER | X86_BR_KERNEL)) 1067 return -EINVAL; 1068 mask |= X86_BR_CALL | X86_BR_IND_CALL | X86_BR_RET | 1069 X86_BR_CALL_STACK; 1070 } 1071 1072 if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP) 1073 mask |= X86_BR_IND_JMP; 1074 1075 if (br_type & PERF_SAMPLE_BRANCH_CALL) 1076 mask |= X86_BR_CALL | X86_BR_ZERO_CALL; 1077 1078 if (br_type & PERF_SAMPLE_BRANCH_TYPE_SAVE) 1079 mask |= X86_BR_TYPE_SAVE; 1080 1081 /* 1082 * stash actual user request into reg, it may 1083 * be used by fixup code for some CPU 1084 */ 1085 event->hw.branch_reg.reg = mask; 1086 return 0; 1087 } 1088 1089 /* 1090 * setup the HW LBR filter 1091 * Used only when available, may not be enough to disambiguate 1092 * all branches, may need the help of the SW filter 1093 */ 1094 static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event) 1095 { 1096 struct hw_perf_event_extra *reg; 1097 u64 br_type = event->attr.branch_sample_type; 1098 u64 mask = 0, v; 1099 int i; 1100 1101 for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) { 1102 if (!(br_type & (1ULL << i))) 1103 continue; 1104 1105 v = x86_pmu.lbr_sel_map[i]; 1106 if (v == LBR_NOT_SUPP) 1107 return -EOPNOTSUPP; 1108 1109 if (v != LBR_IGN) 1110 mask |= v; 1111 } 1112 1113 reg = &event->hw.branch_reg; 1114 reg->idx = EXTRA_REG_LBR; 1115 1116 if (static_cpu_has(X86_FEATURE_ARCH_LBR)) { 1117 reg->config = mask; 1118 1119 /* 1120 * The Arch LBR HW can retrieve the common branch types 1121 * from the LBR_INFO. It doesn't require the high overhead 1122 * SW disassemble. 1123 * Enable the branch type by default for the Arch LBR. 1124 */ 1125 reg->reg |= X86_BR_TYPE_SAVE; 1126 return 0; 1127 } 1128 1129 /* 1130 * The first 9 bits (LBR_SEL_MASK) in LBR_SELECT operate 1131 * in suppress mode. So LBR_SELECT should be set to 1132 * (~mask & LBR_SEL_MASK) | (mask & ~LBR_SEL_MASK) 1133 * But the 10th bit LBR_CALL_STACK does not operate 1134 * in suppress mode. 1135 */ 1136 reg->config = mask ^ (x86_pmu.lbr_sel_mask & ~LBR_CALL_STACK); 1137 1138 if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) && 1139 (br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) && 1140 x86_pmu.lbr_has_info) 1141 reg->config |= LBR_NO_INFO; 1142 1143 return 0; 1144 } 1145 1146 int intel_pmu_setup_lbr_filter(struct perf_event *event) 1147 { 1148 int ret = 0; 1149 1150 /* 1151 * no LBR on this PMU 1152 */ 1153 if (!x86_pmu.lbr_nr) 1154 return -EOPNOTSUPP; 1155 1156 /* 1157 * setup SW LBR filter 1158 */ 1159 ret = intel_pmu_setup_sw_lbr_filter(event); 1160 if (ret) 1161 return ret; 1162 1163 /* 1164 * setup HW LBR filter, if any 1165 */ 1166 if (x86_pmu.lbr_sel_map) 1167 ret = intel_pmu_setup_hw_lbr_filter(event); 1168 1169 return ret; 1170 } 1171 1172 enum { 1173 ARCH_LBR_BR_TYPE_JCC = 0, 1174 ARCH_LBR_BR_TYPE_NEAR_IND_JMP = 1, 1175 ARCH_LBR_BR_TYPE_NEAR_REL_JMP = 2, 1176 ARCH_LBR_BR_TYPE_NEAR_IND_CALL = 3, 1177 ARCH_LBR_BR_TYPE_NEAR_REL_CALL = 4, 1178 ARCH_LBR_BR_TYPE_NEAR_RET = 5, 1179 ARCH_LBR_BR_TYPE_KNOWN_MAX = ARCH_LBR_BR_TYPE_NEAR_RET, 1180 1181 ARCH_LBR_BR_TYPE_MAP_MAX = 16, 1182 }; 1183 1184 static const int arch_lbr_br_type_map[ARCH_LBR_BR_TYPE_MAP_MAX] = { 1185 [ARCH_LBR_BR_TYPE_JCC] = X86_BR_JCC, 1186 [ARCH_LBR_BR_TYPE_NEAR_IND_JMP] = X86_BR_IND_JMP, 1187 [ARCH_LBR_BR_TYPE_NEAR_REL_JMP] = X86_BR_JMP, 1188 [ARCH_LBR_BR_TYPE_NEAR_IND_CALL] = X86_BR_IND_CALL, 1189 [ARCH_LBR_BR_TYPE_NEAR_REL_CALL] = X86_BR_CALL, 1190 [ARCH_LBR_BR_TYPE_NEAR_RET] = X86_BR_RET, 1191 }; 1192 1193 /* 1194 * implement actual branch filter based on user demand. 1195 * Hardware may not exactly satisfy that request, thus 1196 * we need to inspect opcodes. Mismatched branches are 1197 * discarded. Therefore, the number of branches returned 1198 * in PERF_SAMPLE_BRANCH_STACK sample may vary. 1199 */ 1200 static void 1201 intel_pmu_lbr_filter(struct cpu_hw_events *cpuc) 1202 { 1203 u64 from, to; 1204 int br_sel = cpuc->br_sel; 1205 int i, j, type, to_plm; 1206 bool compress = false; 1207 1208 /* if sampling all branches, then nothing to filter */ 1209 if (((br_sel & X86_BR_ALL) == X86_BR_ALL) && 1210 ((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE)) 1211 return; 1212 1213 for (i = 0; i < cpuc->lbr_stack.nr; i++) { 1214 1215 from = cpuc->lbr_entries[i].from; 1216 to = cpuc->lbr_entries[i].to; 1217 type = cpuc->lbr_entries[i].type; 1218 1219 /* 1220 * Parse the branch type recorded in LBR_x_INFO MSR. 1221 * Doesn't support OTHER_BRANCH decoding for now. 1222 * OTHER_BRANCH branch type still rely on software decoding. 1223 */ 1224 if (static_cpu_has(X86_FEATURE_ARCH_LBR) && 1225 type <= ARCH_LBR_BR_TYPE_KNOWN_MAX) { 1226 to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER; 1227 type = arch_lbr_br_type_map[type] | to_plm; 1228 } else 1229 type = branch_type(from, to, cpuc->lbr_entries[i].abort); 1230 if (type != X86_BR_NONE && (br_sel & X86_BR_ANYTX)) { 1231 if (cpuc->lbr_entries[i].in_tx) 1232 type |= X86_BR_IN_TX; 1233 else 1234 type |= X86_BR_NO_TX; 1235 } 1236 1237 /* if type does not correspond, then discard */ 1238 if (type == X86_BR_NONE || (br_sel & type) != type) { 1239 cpuc->lbr_entries[i].from = 0; 1240 compress = true; 1241 } 1242 1243 if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE) 1244 cpuc->lbr_entries[i].type = common_branch_type(type); 1245 } 1246 1247 if (!compress) 1248 return; 1249 1250 /* remove all entries with from=0 */ 1251 for (i = 0; i < cpuc->lbr_stack.nr; ) { 1252 if (!cpuc->lbr_entries[i].from) { 1253 j = i; 1254 while (++j < cpuc->lbr_stack.nr) { 1255 cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j]; 1256 cpuc->lbr_counters[j-1] = cpuc->lbr_counters[j]; 1257 } 1258 cpuc->lbr_stack.nr--; 1259 if (!cpuc->lbr_entries[i].from) 1260 continue; 1261 } 1262 i++; 1263 } 1264 } 1265 1266 void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr) 1267 { 1268 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 1269 1270 /* Cannot get TOS for large PEBS and Arch LBR */ 1271 if (static_cpu_has(X86_FEATURE_ARCH_LBR) || 1272 (cpuc->n_pebs == cpuc->n_large_pebs)) 1273 cpuc->lbr_stack.hw_idx = -1ULL; 1274 else 1275 cpuc->lbr_stack.hw_idx = intel_pmu_lbr_tos(); 1276 1277 intel_pmu_store_lbr(cpuc, lbr); 1278 intel_pmu_lbr_filter(cpuc); 1279 } 1280 1281 /* 1282 * Map interface branch filters onto LBR filters 1283 */ 1284 static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { 1285 [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY, 1286 [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER, 1287 [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL, 1288 [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN, 1289 [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_REL_JMP 1290 | LBR_IND_JMP | LBR_FAR, 1291 /* 1292 * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches 1293 */ 1294 [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = 1295 LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR, 1296 /* 1297 * NHM/WSM erratum: must include IND_JMP to capture IND_CALL 1298 */ 1299 [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL | LBR_IND_JMP, 1300 [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC, 1301 [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP, 1302 }; 1303 1304 static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { 1305 [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY, 1306 [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER, 1307 [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL, 1308 [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN, 1309 [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR, 1310 [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL 1311 | LBR_FAR, 1312 [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL, 1313 [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC, 1314 [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP, 1315 [PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL, 1316 }; 1317 1318 static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { 1319 [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY, 1320 [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER, 1321 [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL, 1322 [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN, 1323 [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR, 1324 [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL 1325 | LBR_FAR, 1326 [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL, 1327 [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC, 1328 [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_REL_CALL | LBR_IND_CALL 1329 | LBR_RETURN | LBR_CALL_STACK, 1330 [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP, 1331 [PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL, 1332 }; 1333 1334 static int arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { 1335 [PERF_SAMPLE_BRANCH_ANY_SHIFT] = ARCH_LBR_ANY, 1336 [PERF_SAMPLE_BRANCH_USER_SHIFT] = ARCH_LBR_USER, 1337 [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = ARCH_LBR_KERNEL, 1338 [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN, 1339 [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = ARCH_LBR_RETURN | 1340 ARCH_LBR_OTHER_BRANCH, 1341 [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = ARCH_LBR_REL_CALL | 1342 ARCH_LBR_IND_CALL | 1343 ARCH_LBR_OTHER_BRANCH, 1344 [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = ARCH_LBR_IND_CALL, 1345 [PERF_SAMPLE_BRANCH_COND_SHIFT] = ARCH_LBR_JCC, 1346 [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = ARCH_LBR_REL_CALL | 1347 ARCH_LBR_IND_CALL | 1348 ARCH_LBR_RETURN | 1349 ARCH_LBR_CALL_STACK, 1350 [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = ARCH_LBR_IND_JMP, 1351 [PERF_SAMPLE_BRANCH_CALL_SHIFT] = ARCH_LBR_REL_CALL, 1352 }; 1353 1354 /* core */ 1355 void __init intel_pmu_lbr_init_core(void) 1356 { 1357 x86_pmu.lbr_nr = 4; 1358 x86_pmu.lbr_tos = MSR_LBR_TOS; 1359 x86_pmu.lbr_from = MSR_LBR_CORE_FROM; 1360 x86_pmu.lbr_to = MSR_LBR_CORE_TO; 1361 1362 /* 1363 * SW branch filter usage: 1364 * - compensate for lack of HW filter 1365 */ 1366 } 1367 1368 /* nehalem/westmere */ 1369 void __init intel_pmu_lbr_init_nhm(void) 1370 { 1371 x86_pmu.lbr_nr = 16; 1372 x86_pmu.lbr_tos = MSR_LBR_TOS; 1373 x86_pmu.lbr_from = MSR_LBR_NHM_FROM; 1374 x86_pmu.lbr_to = MSR_LBR_NHM_TO; 1375 1376 x86_pmu.lbr_sel_mask = LBR_SEL_MASK; 1377 x86_pmu.lbr_sel_map = nhm_lbr_sel_map; 1378 1379 /* 1380 * SW branch filter usage: 1381 * - workaround LBR_SEL errata (see above) 1382 * - support syscall, sysret capture. 1383 * That requires LBR_FAR but that means far 1384 * jmp need to be filtered out 1385 */ 1386 } 1387 1388 /* sandy bridge */ 1389 void __init intel_pmu_lbr_init_snb(void) 1390 { 1391 x86_pmu.lbr_nr = 16; 1392 x86_pmu.lbr_tos = MSR_LBR_TOS; 1393 x86_pmu.lbr_from = MSR_LBR_NHM_FROM; 1394 x86_pmu.lbr_to = MSR_LBR_NHM_TO; 1395 1396 x86_pmu.lbr_sel_mask = LBR_SEL_MASK; 1397 x86_pmu.lbr_sel_map = snb_lbr_sel_map; 1398 1399 /* 1400 * SW branch filter usage: 1401 * - support syscall, sysret capture. 1402 * That requires LBR_FAR but that means far 1403 * jmp need to be filtered out 1404 */ 1405 } 1406 1407 static inline struct kmem_cache * 1408 create_lbr_kmem_cache(size_t size, size_t align) 1409 { 1410 return kmem_cache_create("x86_lbr", size, align, 0, NULL); 1411 } 1412 1413 /* haswell */ 1414 void intel_pmu_lbr_init_hsw(void) 1415 { 1416 size_t size = sizeof(struct x86_perf_task_context); 1417 1418 x86_pmu.lbr_nr = 16; 1419 x86_pmu.lbr_tos = MSR_LBR_TOS; 1420 x86_pmu.lbr_from = MSR_LBR_NHM_FROM; 1421 x86_pmu.lbr_to = MSR_LBR_NHM_TO; 1422 1423 x86_pmu.lbr_sel_mask = LBR_SEL_MASK; 1424 x86_pmu.lbr_sel_map = hsw_lbr_sel_map; 1425 1426 x86_get_pmu(smp_processor_id())->task_ctx_cache = create_lbr_kmem_cache(size, 0); 1427 } 1428 1429 /* skylake */ 1430 __init void intel_pmu_lbr_init_skl(void) 1431 { 1432 size_t size = sizeof(struct x86_perf_task_context); 1433 1434 x86_pmu.lbr_nr = 32; 1435 x86_pmu.lbr_tos = MSR_LBR_TOS; 1436 x86_pmu.lbr_from = MSR_LBR_NHM_FROM; 1437 x86_pmu.lbr_to = MSR_LBR_NHM_TO; 1438 x86_pmu.lbr_info = MSR_LBR_INFO_0; 1439 1440 x86_pmu.lbr_sel_mask = LBR_SEL_MASK; 1441 x86_pmu.lbr_sel_map = hsw_lbr_sel_map; 1442 1443 x86_get_pmu(smp_processor_id())->task_ctx_cache = create_lbr_kmem_cache(size, 0); 1444 1445 /* 1446 * SW branch filter usage: 1447 * - support syscall, sysret capture. 1448 * That requires LBR_FAR but that means far 1449 * jmp need to be filtered out 1450 */ 1451 } 1452 1453 /* atom */ 1454 void __init intel_pmu_lbr_init_atom(void) 1455 { 1456 /* 1457 * only models starting at stepping 10 seems 1458 * to have an operational LBR which can freeze 1459 * on PMU interrupt 1460 */ 1461 if (boot_cpu_data.x86_vfm == INTEL_ATOM_BONNELL 1462 && boot_cpu_data.x86_stepping < 10) { 1463 pr_cont("LBR disabled due to erratum"); 1464 return; 1465 } 1466 1467 x86_pmu.lbr_nr = 8; 1468 x86_pmu.lbr_tos = MSR_LBR_TOS; 1469 x86_pmu.lbr_from = MSR_LBR_CORE_FROM; 1470 x86_pmu.lbr_to = MSR_LBR_CORE_TO; 1471 1472 /* 1473 * SW branch filter usage: 1474 * - compensate for lack of HW filter 1475 */ 1476 } 1477 1478 /* slm */ 1479 void __init intel_pmu_lbr_init_slm(void) 1480 { 1481 x86_pmu.lbr_nr = 8; 1482 x86_pmu.lbr_tos = MSR_LBR_TOS; 1483 x86_pmu.lbr_from = MSR_LBR_CORE_FROM; 1484 x86_pmu.lbr_to = MSR_LBR_CORE_TO; 1485 1486 x86_pmu.lbr_sel_mask = LBR_SEL_MASK; 1487 x86_pmu.lbr_sel_map = nhm_lbr_sel_map; 1488 1489 /* 1490 * SW branch filter usage: 1491 * - compensate for lack of HW filter 1492 */ 1493 pr_cont("8-deep LBR, "); 1494 } 1495 1496 /* Knights Landing */ 1497 void intel_pmu_lbr_init_knl(void) 1498 { 1499 x86_pmu.lbr_nr = 8; 1500 x86_pmu.lbr_tos = MSR_LBR_TOS; 1501 x86_pmu.lbr_from = MSR_LBR_NHM_FROM; 1502 x86_pmu.lbr_to = MSR_LBR_NHM_TO; 1503 1504 x86_pmu.lbr_sel_mask = LBR_SEL_MASK; 1505 x86_pmu.lbr_sel_map = snb_lbr_sel_map; 1506 1507 /* Knights Landing does have MISPREDICT bit */ 1508 if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_LIP) 1509 x86_pmu.intel_cap.lbr_format = LBR_FORMAT_EIP_FLAGS; 1510 } 1511 1512 void intel_pmu_lbr_init(void) 1513 { 1514 switch (x86_pmu.intel_cap.lbr_format) { 1515 case LBR_FORMAT_EIP_FLAGS2: 1516 x86_pmu.lbr_has_tsx = 1; 1517 x86_pmu.lbr_from_flags = 1; 1518 if (lbr_from_signext_quirk_needed()) 1519 static_branch_enable(&lbr_from_quirk_key); 1520 break; 1521 1522 case LBR_FORMAT_EIP_FLAGS: 1523 x86_pmu.lbr_from_flags = 1; 1524 break; 1525 1526 case LBR_FORMAT_INFO: 1527 x86_pmu.lbr_has_tsx = 1; 1528 fallthrough; 1529 case LBR_FORMAT_INFO2: 1530 x86_pmu.lbr_has_info = 1; 1531 break; 1532 1533 case LBR_FORMAT_TIME: 1534 x86_pmu.lbr_from_flags = 1; 1535 x86_pmu.lbr_to_cycles = 1; 1536 break; 1537 } 1538 1539 if (x86_pmu.lbr_has_info) { 1540 /* 1541 * Only used in combination with baseline pebs. 1542 */ 1543 static_branch_enable(&x86_lbr_mispred); 1544 static_branch_enable(&x86_lbr_cycles); 1545 } 1546 } 1547 1548 /* 1549 * LBR state size is variable based on the max number of registers. 1550 * This calculates the expected state size, which should match 1551 * what the hardware enumerates for the size of XFEATURE_LBR. 1552 */ 1553 static inline unsigned int get_lbr_state_size(void) 1554 { 1555 return sizeof(struct arch_lbr_state) + 1556 x86_pmu.lbr_nr * sizeof(struct lbr_entry); 1557 } 1558 1559 static bool is_arch_lbr_xsave_available(void) 1560 { 1561 if (!boot_cpu_has(X86_FEATURE_XSAVES)) 1562 return false; 1563 1564 /* 1565 * Check the LBR state with the corresponding software structure. 1566 * Disable LBR XSAVES support if the size doesn't match. 1567 */ 1568 if (xfeature_size(XFEATURE_LBR) == 0) 1569 return false; 1570 1571 if (WARN_ON(xfeature_size(XFEATURE_LBR) != get_lbr_state_size())) 1572 return false; 1573 1574 return true; 1575 } 1576 1577 void __init intel_pmu_arch_lbr_init(void) 1578 { 1579 struct pmu *pmu = x86_get_pmu(smp_processor_id()); 1580 union cpuid28_eax eax; 1581 union cpuid28_ebx ebx; 1582 union cpuid28_ecx ecx; 1583 unsigned int unused_edx; 1584 bool arch_lbr_xsave; 1585 size_t size; 1586 u64 lbr_nr; 1587 1588 /* Arch LBR Capabilities */ 1589 cpuid(28, &eax.full, &ebx.full, &ecx.full, &unused_edx); 1590 1591 lbr_nr = fls(eax.split.lbr_depth_mask) * 8; 1592 if (!lbr_nr) 1593 goto clear_arch_lbr; 1594 1595 /* Apply the max depth of Arch LBR */ 1596 if (wrmsrl_safe(MSR_ARCH_LBR_DEPTH, lbr_nr)) 1597 goto clear_arch_lbr; 1598 1599 x86_pmu.lbr_depth_mask = eax.split.lbr_depth_mask; 1600 x86_pmu.lbr_deep_c_reset = eax.split.lbr_deep_c_reset; 1601 x86_pmu.lbr_lip = eax.split.lbr_lip; 1602 x86_pmu.lbr_cpl = ebx.split.lbr_cpl; 1603 x86_pmu.lbr_filter = ebx.split.lbr_filter; 1604 x86_pmu.lbr_call_stack = ebx.split.lbr_call_stack; 1605 x86_pmu.lbr_mispred = ecx.split.lbr_mispred; 1606 x86_pmu.lbr_timed_lbr = ecx.split.lbr_timed_lbr; 1607 x86_pmu.lbr_br_type = ecx.split.lbr_br_type; 1608 x86_pmu.lbr_counters = ecx.split.lbr_counters; 1609 x86_pmu.lbr_nr = lbr_nr; 1610 1611 if (!!x86_pmu.lbr_counters) 1612 x86_pmu.flags |= PMU_FL_BR_CNTR; 1613 1614 if (x86_pmu.lbr_mispred) 1615 static_branch_enable(&x86_lbr_mispred); 1616 if (x86_pmu.lbr_timed_lbr) 1617 static_branch_enable(&x86_lbr_cycles); 1618 if (x86_pmu.lbr_br_type) 1619 static_branch_enable(&x86_lbr_type); 1620 1621 arch_lbr_xsave = is_arch_lbr_xsave_available(); 1622 if (arch_lbr_xsave) { 1623 size = sizeof(struct x86_perf_task_context_arch_lbr_xsave) + 1624 get_lbr_state_size(); 1625 pmu->task_ctx_cache = create_lbr_kmem_cache(size, 1626 XSAVE_ALIGNMENT); 1627 } 1628 1629 if (!pmu->task_ctx_cache) { 1630 arch_lbr_xsave = false; 1631 1632 size = sizeof(struct x86_perf_task_context_arch_lbr) + 1633 lbr_nr * sizeof(struct lbr_entry); 1634 pmu->task_ctx_cache = create_lbr_kmem_cache(size, 0); 1635 } 1636 1637 x86_pmu.lbr_from = MSR_ARCH_LBR_FROM_0; 1638 x86_pmu.lbr_to = MSR_ARCH_LBR_TO_0; 1639 x86_pmu.lbr_info = MSR_ARCH_LBR_INFO_0; 1640 1641 /* LBR callstack requires both CPL and Branch Filtering support */ 1642 if (!x86_pmu.lbr_cpl || 1643 !x86_pmu.lbr_filter || 1644 !x86_pmu.lbr_call_stack) 1645 arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_NOT_SUPP; 1646 1647 if (!x86_pmu.lbr_cpl) { 1648 arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_NOT_SUPP; 1649 arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_NOT_SUPP; 1650 } else if (!x86_pmu.lbr_filter) { 1651 arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_NOT_SUPP; 1652 arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_NOT_SUPP; 1653 arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_NOT_SUPP; 1654 arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_NOT_SUPP; 1655 arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_NOT_SUPP; 1656 arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_NOT_SUPP; 1657 arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_NOT_SUPP; 1658 } 1659 1660 x86_pmu.lbr_ctl_mask = ARCH_LBR_CTL_MASK; 1661 x86_pmu.lbr_ctl_map = arch_lbr_ctl_map; 1662 1663 if (!x86_pmu.lbr_cpl && !x86_pmu.lbr_filter) 1664 x86_pmu.lbr_ctl_map = NULL; 1665 1666 x86_pmu.lbr_reset = intel_pmu_arch_lbr_reset; 1667 if (arch_lbr_xsave) { 1668 x86_pmu.lbr_save = intel_pmu_arch_lbr_xsaves; 1669 x86_pmu.lbr_restore = intel_pmu_arch_lbr_xrstors; 1670 x86_pmu.lbr_read = intel_pmu_arch_lbr_read_xsave; 1671 pr_cont("XSAVE "); 1672 } else { 1673 x86_pmu.lbr_save = intel_pmu_arch_lbr_save; 1674 x86_pmu.lbr_restore = intel_pmu_arch_lbr_restore; 1675 x86_pmu.lbr_read = intel_pmu_arch_lbr_read; 1676 } 1677 1678 pr_cont("Architectural LBR, "); 1679 1680 return; 1681 1682 clear_arch_lbr: 1683 setup_clear_cpu_cap(X86_FEATURE_ARCH_LBR); 1684 } 1685 1686 /** 1687 * x86_perf_get_lbr - get the LBR records information 1688 * 1689 * @lbr: the caller's memory to store the LBR records information 1690 */ 1691 void x86_perf_get_lbr(struct x86_pmu_lbr *lbr) 1692 { 1693 lbr->nr = x86_pmu.lbr_nr; 1694 lbr->from = x86_pmu.lbr_from; 1695 lbr->to = x86_pmu.lbr_to; 1696 lbr->info = x86_pmu.lbr_info; 1697 lbr->has_callstack = x86_pmu_has_lbr_callstack(); 1698 } 1699 EXPORT_SYMBOL_GPL(x86_perf_get_lbr); 1700 1701 struct event_constraint vlbr_constraint = 1702 __EVENT_CONSTRAINT(INTEL_FIXED_VLBR_EVENT, (1ULL << INTEL_PMC_IDX_FIXED_VLBR), 1703 FIXED_EVENT_FLAGS, 1, 0, PERF_X86_EVENT_LBR_SELECT); 1704
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.