1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Implement support for AMD Fam19h Branch Sampling feature 4 * Based on specifications published in AMD PPR Fam19 Model 01 5 * 6 * Copyright 2021 Google LLC 7 * Contributed by Stephane Eranian <eranian@google.com> 8 */ 9 #include <linux/kernel.h> 10 #include <linux/jump_label.h> 11 #include <asm/msr.h> 12 #include <asm/cpufeature.h> 13 14 #include "../perf_event.h" 15 16 #define BRS_POISON 0xFFFFFFFFFFFFFFFEULL /* mark limit of valid entries */ 17 18 /* Debug Extension Configuration register layout */ 19 union amd_debug_extn_cfg { 20 __u64 val; 21 struct { 22 __u64 rsvd0:2, /* reserved */ 23 brsmen:1, /* branch sample enable */ 24 rsvd4_3:2,/* reserved - must be 0x3 */ 25 vb:1, /* valid branches recorded */ 26 rsvd2:10, /* reserved */ 27 msroff:4, /* index of next entry to write */ 28 rsvd3:4, /* reserved */ 29 pmc:3, /* #PMC holding the sampling event */ 30 rsvd4:37; /* reserved */ 31 }; 32 }; 33 34 static inline unsigned int brs_from(int idx) 35 { 36 return MSR_AMD_SAMP_BR_FROM + 2 * idx; 37 } 38 39 static inline unsigned int brs_to(int idx) 40 { 41 return MSR_AMD_SAMP_BR_FROM + 2 * idx + 1; 42 } 43 44 static __always_inline void set_debug_extn_cfg(u64 val) 45 { 46 /* bits[4:3] must always be set to 11b */ 47 __wrmsr(MSR_AMD_DBG_EXTN_CFG, val | 3ULL << 3, val >> 32); 48 } 49 50 static __always_inline u64 get_debug_extn_cfg(void) 51 { 52 return __rdmsr(MSR_AMD_DBG_EXTN_CFG); 53 } 54 55 static bool __init amd_brs_detect(void) 56 { 57 if (!cpu_feature_enabled(X86_FEATURE_BRS)) 58 return false; 59 60 switch (boot_cpu_data.x86) { 61 case 0x19: /* AMD Fam19h (Zen3) */ 62 x86_pmu.lbr_nr = 16; 63 64 /* No hardware filtering supported */ 65 x86_pmu.lbr_sel_map = NULL; 66 x86_pmu.lbr_sel_mask = 0; 67 break; 68 default: 69 return false; 70 } 71 72 return true; 73 } 74 75 /* 76 * Current BRS implementation does not support branch type or privilege level 77 * filtering. Therefore, this function simply enforces these limitations. No need for 78 * a br_sel_map. Software filtering is not supported because it would not correlate well 79 * with a sampling period. 80 */ 81 static int amd_brs_setup_filter(struct perf_event *event) 82 { 83 u64 type = event->attr.branch_sample_type; 84 85 /* No BRS support */ 86 if (!x86_pmu.lbr_nr) 87 return -EOPNOTSUPP; 88 89 /* Can only capture all branches, i.e., no filtering */ 90 if ((type & ~PERF_SAMPLE_BRANCH_PLM_ALL) != PERF_SAMPLE_BRANCH_ANY) 91 return -EINVAL; 92 93 return 0; 94 } 95 96 static inline int amd_is_brs_event(struct perf_event *e) 97 { 98 return (e->hw.config & AMD64_RAW_EVENT_MASK) == AMD_FAM19H_BRS_EVENT; 99 } 100 101 int amd_brs_hw_config(struct perf_event *event) 102 { 103 int ret = 0; 104 105 /* 106 * Due to interrupt holding, BRS is not recommended in 107 * counting mode. 108 */ 109 if (!is_sampling_event(event)) 110 return -EINVAL; 111 112 /* 113 * Due to the way BRS operates by holding the interrupt until 114 * lbr_nr entries have been captured, it does not make sense 115 * to allow sampling on BRS with an event that does not match 116 * what BRS is capturing, i.e., retired taken branches. 117 * Otherwise the correlation with the event's period is even 118 * more loose: 119 * 120 * With retired taken branch: 121 * Effective P = P + 16 + X 122 * With any other event: 123 * Effective P = P + Y + X 124 * 125 * Where X is the number of taken branches due to interrupt 126 * skid. Skid is large. 127 * 128 * Where Y is the occurrences of the event while BRS is 129 * capturing the lbr_nr entries. 130 * 131 * By using retired taken branches, we limit the impact on the 132 * Y variable. We know it cannot be more than the depth of 133 * BRS. 134 */ 135 if (!amd_is_brs_event(event)) 136 return -EINVAL; 137 138 /* 139 * BRS implementation does not work with frequency mode 140 * reprogramming of the period. 141 */ 142 if (event->attr.freq) 143 return -EINVAL; 144 /* 145 * The kernel subtracts BRS depth from period, so it must 146 * be big enough. 147 */ 148 if (event->attr.sample_period <= x86_pmu.lbr_nr) 149 return -EINVAL; 150 151 /* 152 * Check if we can allow PERF_SAMPLE_BRANCH_STACK 153 */ 154 ret = amd_brs_setup_filter(event); 155 156 /* only set in case of success */ 157 if (!ret) 158 event->hw.flags |= PERF_X86_EVENT_AMD_BRS; 159 160 return ret; 161 } 162 163 /* tos = top of stack, i.e., last valid entry written */ 164 static inline int amd_brs_get_tos(union amd_debug_extn_cfg *cfg) 165 { 166 /* 167 * msroff: index of next entry to write so top-of-stack is one off 168 * if BRS is full then msroff is set back to 0. 169 */ 170 return (cfg->msroff ? cfg->msroff : x86_pmu.lbr_nr) - 1; 171 } 172 173 /* 174 * make sure we have a sane BRS offset to begin with 175 * especially with kexec 176 */ 177 void amd_brs_reset(void) 178 { 179 if (!cpu_feature_enabled(X86_FEATURE_BRS)) 180 return; 181 182 /* 183 * Reset config 184 */ 185 set_debug_extn_cfg(0); 186 187 /* 188 * Mark first entry as poisoned 189 */ 190 wrmsrl(brs_to(0), BRS_POISON); 191 } 192 193 int __init amd_brs_init(void) 194 { 195 if (!amd_brs_detect()) 196 return -EOPNOTSUPP; 197 198 pr_cont("%d-deep BRS, ", x86_pmu.lbr_nr); 199 200 return 0; 201 } 202 203 void amd_brs_enable(void) 204 { 205 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 206 union amd_debug_extn_cfg cfg; 207 208 /* Activate only on first user */ 209 if (++cpuc->brs_active > 1) 210 return; 211 212 cfg.val = 0; /* reset all fields */ 213 cfg.brsmen = 1; /* enable branch sampling */ 214 215 /* Set enable bit */ 216 set_debug_extn_cfg(cfg.val); 217 } 218 219 void amd_brs_enable_all(void) 220 { 221 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 222 if (cpuc->lbr_users) 223 amd_brs_enable(); 224 } 225 226 void amd_brs_disable(void) 227 { 228 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 229 union amd_debug_extn_cfg cfg; 230 231 /* Check if active (could be disabled via x86_pmu_disable_all()) */ 232 if (!cpuc->brs_active) 233 return; 234 235 /* Only disable for last user */ 236 if (--cpuc->brs_active) 237 return; 238 239 /* 240 * Clear the brsmen bit but preserve the others as they contain 241 * useful state such as vb and msroff 242 */ 243 cfg.val = get_debug_extn_cfg(); 244 245 /* 246 * When coming in on interrupt and BRS is full, then hw will have 247 * already stopped BRS, no need to issue wrmsr again 248 */ 249 if (cfg.brsmen) { 250 cfg.brsmen = 0; 251 set_debug_extn_cfg(cfg.val); 252 } 253 } 254 255 void amd_brs_disable_all(void) 256 { 257 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 258 if (cpuc->lbr_users) 259 amd_brs_disable(); 260 } 261 262 static bool amd_brs_match_plm(struct perf_event *event, u64 to) 263 { 264 int type = event->attr.branch_sample_type; 265 int plm_k = PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_HV; 266 int plm_u = PERF_SAMPLE_BRANCH_USER; 267 268 if (!(type & plm_k) && kernel_ip(to)) 269 return 0; 270 271 if (!(type & plm_u) && !kernel_ip(to)) 272 return 0; 273 274 return 1; 275 } 276 277 /* 278 * Caller must ensure amd_brs_inuse() is true before calling 279 * return: 280 */ 281 void amd_brs_drain(void) 282 { 283 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 284 struct perf_event *event = cpuc->events[0]; 285 struct perf_branch_entry *br = cpuc->lbr_entries; 286 union amd_debug_extn_cfg cfg; 287 u32 i, nr = 0, num, tos, start; 288 u32 shift = 64 - boot_cpu_data.x86_virt_bits; 289 290 /* 291 * BRS event forced on PMC0, 292 * so check if there is an event. 293 * It is possible to have lbr_users > 0 but the event 294 * not yet scheduled due to long latency PMU irq 295 */ 296 if (!event) 297 goto empty; 298 299 cfg.val = get_debug_extn_cfg(); 300 301 /* Sanity check [0-x86_pmu.lbr_nr] */ 302 if (WARN_ON_ONCE(cfg.msroff >= x86_pmu.lbr_nr)) 303 goto empty; 304 305 /* No valid branch */ 306 if (cfg.vb == 0) 307 goto empty; 308 309 /* 310 * msr.off points to next entry to be written 311 * tos = most recent entry index = msr.off - 1 312 * BRS register buffer saturates, so we know we have 313 * start < tos and that we have to read from start to tos 314 */ 315 start = 0; 316 tos = amd_brs_get_tos(&cfg); 317 318 num = tos - start + 1; 319 320 /* 321 * BRS is only one pass (saturation) from MSROFF to depth-1 322 * MSROFF wraps to zero when buffer is full 323 */ 324 for (i = 0; i < num; i++) { 325 u32 brs_idx = tos - i; 326 u64 from, to; 327 328 rdmsrl(brs_to(brs_idx), to); 329 330 /* Entry does not belong to us (as marked by kernel) */ 331 if (to == BRS_POISON) 332 break; 333 334 /* 335 * Sign-extend SAMP_BR_TO to 64 bits, bits 61-63 are reserved. 336 * Necessary to generate proper virtual addresses suitable for 337 * symbolization 338 */ 339 to = (u64)(((s64)to << shift) >> shift); 340 341 if (!amd_brs_match_plm(event, to)) 342 continue; 343 344 rdmsrl(brs_from(brs_idx), from); 345 346 perf_clear_branch_entry_bitfields(br+nr); 347 348 br[nr].from = from; 349 br[nr].to = to; 350 351 nr++; 352 } 353 empty: 354 /* Record number of sampled branches */ 355 cpuc->lbr_stack.nr = nr; 356 } 357 358 /* 359 * Poison most recent entry to prevent reuse by next task 360 * required because BRS entry are not tagged by PID 361 */ 362 static void amd_brs_poison_buffer(void) 363 { 364 union amd_debug_extn_cfg cfg; 365 unsigned int idx; 366 367 /* Get current state */ 368 cfg.val = get_debug_extn_cfg(); 369 370 /* idx is most recently written entry */ 371 idx = amd_brs_get_tos(&cfg); 372 373 /* Poison target of entry */ 374 wrmsrl(brs_to(idx), BRS_POISON); 375 } 376 377 /* 378 * On context switch in, we need to make sure no samples from previous user 379 * are left in the BRS. 380 * 381 * On ctxswin, sched_in = true, called after the PMU has started 382 * On ctxswout, sched_in = false, called before the PMU is stopped 383 */ 384 void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) 385 { 386 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 387 388 /* no active users */ 389 if (!cpuc->lbr_users) 390 return; 391 392 /* 393 * On context switch in, we need to ensure we do not use entries 394 * from previous BRS user on that CPU, so we poison the buffer as 395 * a faster way compared to resetting all entries. 396 */ 397 if (sched_in) 398 amd_brs_poison_buffer(); 399 } 400 401 /* 402 * called from ACPI processor_idle.c or acpi_pad.c 403 * with interrupts disabled 404 */ 405 void noinstr perf_amd_brs_lopwr_cb(bool lopwr_in) 406 { 407 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 408 union amd_debug_extn_cfg cfg; 409 410 /* 411 * on mwait in, we may end up in non C0 state. 412 * we must disable branch sampling to avoid holding the NMI 413 * for too long. We disable it in hardware but we 414 * keep the state in cpuc, so we can re-enable. 415 * 416 * The hardware will deliver the NMI if needed when brsmen cleared 417 */ 418 if (cpuc->brs_active) { 419 cfg.val = get_debug_extn_cfg(); 420 cfg.brsmen = !lopwr_in; 421 set_debug_extn_cfg(cfg.val); 422 } 423 } 424 425 DEFINE_STATIC_CALL_NULL(perf_lopwr_cb, perf_amd_brs_lopwr_cb); 426 EXPORT_STATIC_CALL_TRAMP_GPL(perf_lopwr_cb); 427 428 void __init amd_brs_lopwr_init(void) 429 { 430 static_call_update(perf_lopwr_cb, perf_amd_brs_lopwr_cb); 431 } 432
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.