~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/x86/events/amd/lbr.c

Version: ~ [ linux-6.11-rc3 ] ~ [ linux-6.10.4 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.45 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.104 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.164 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.223 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.281 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.319 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0
  2 #include <linux/perf_event.h>
  3 #include <asm/perf_event.h>
  4 
  5 #include "../perf_event.h"
  6 
  7 /* LBR Branch Select valid bits */
  8 #define LBR_SELECT_MASK         0x1ff
  9 
 10 /*
 11  * LBR Branch Select filter bits which when set, ensures that the
 12  * corresponding type of branches are not recorded
 13  */
 14 #define LBR_SELECT_KERNEL               0       /* Branches ending in CPL = 0 */
 15 #define LBR_SELECT_USER                 1       /* Branches ending in CPL > 0 */
 16 #define LBR_SELECT_JCC                  2       /* Conditional branches */
 17 #define LBR_SELECT_CALL_NEAR_REL        3       /* Near relative calls */
 18 #define LBR_SELECT_CALL_NEAR_IND        4       /* Indirect relative calls */
 19 #define LBR_SELECT_RET_NEAR             5       /* Near returns */
 20 #define LBR_SELECT_JMP_NEAR_IND         6       /* Near indirect jumps (excl. calls and returns) */
 21 #define LBR_SELECT_JMP_NEAR_REL         7       /* Near relative jumps (excl. calls) */
 22 #define LBR_SELECT_FAR_BRANCH           8       /* Far branches */
 23 
 24 #define LBR_KERNEL      BIT(LBR_SELECT_KERNEL)
 25 #define LBR_USER        BIT(LBR_SELECT_USER)
 26 #define LBR_JCC         BIT(LBR_SELECT_JCC)
 27 #define LBR_REL_CALL    BIT(LBR_SELECT_CALL_NEAR_REL)
 28 #define LBR_IND_CALL    BIT(LBR_SELECT_CALL_NEAR_IND)
 29 #define LBR_RETURN      BIT(LBR_SELECT_RET_NEAR)
 30 #define LBR_REL_JMP     BIT(LBR_SELECT_JMP_NEAR_REL)
 31 #define LBR_IND_JMP     BIT(LBR_SELECT_JMP_NEAR_IND)
 32 #define LBR_FAR         BIT(LBR_SELECT_FAR_BRANCH)
 33 #define LBR_NOT_SUPP    -1      /* unsupported filter */
 34 #define LBR_IGNORE      0
 35 
 36 #define LBR_ANY         \
 37         (LBR_JCC | LBR_REL_CALL | LBR_IND_CALL | LBR_RETURN |   \
 38          LBR_REL_JMP | LBR_IND_JMP | LBR_FAR)
 39 
 40 struct branch_entry {
 41         union {
 42                 struct {
 43                         u64     ip:58;
 44                         u64     ip_sign_ext:5;
 45                         u64     mispredict:1;
 46                 } split;
 47                 u64             full;
 48         } from;
 49 
 50         union {
 51                 struct {
 52                         u64     ip:58;
 53                         u64     ip_sign_ext:3;
 54                         u64     reserved:1;
 55                         u64     spec:1;
 56                         u64     valid:1;
 57                 } split;
 58                 u64             full;
 59         } to;
 60 };
 61 
 62 static __always_inline void amd_pmu_lbr_set_from(unsigned int idx, u64 val)
 63 {
 64         wrmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2, val);
 65 }
 66 
 67 static __always_inline void amd_pmu_lbr_set_to(unsigned int idx, u64 val)
 68 {
 69         wrmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val);
 70 }
 71 
 72 static __always_inline u64 amd_pmu_lbr_get_from(unsigned int idx)
 73 {
 74         u64 val;
 75 
 76         rdmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2, val);
 77 
 78         return val;
 79 }
 80 
 81 static __always_inline u64 amd_pmu_lbr_get_to(unsigned int idx)
 82 {
 83         u64 val;
 84 
 85         rdmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val);
 86 
 87         return val;
 88 }
 89 
 90 static __always_inline u64 sign_ext_branch_ip(u64 ip)
 91 {
 92         u32 shift = 64 - boot_cpu_data.x86_virt_bits;
 93 
 94         return (u64)(((s64)ip << shift) >> shift);
 95 }
 96 
 97 static void amd_pmu_lbr_filter(void)
 98 {
 99         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
100         int br_sel = cpuc->br_sel, offset, type, i, j;
101         bool compress = false;
102         bool fused_only = false;
103         u64 from, to;
104 
105         /* If sampling all branches, there is nothing to filter */
106         if (((br_sel & X86_BR_ALL) == X86_BR_ALL) &&
107             ((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE))
108                 fused_only = true;
109 
110         for (i = 0; i < cpuc->lbr_stack.nr; i++) {
111                 from = cpuc->lbr_entries[i].from;
112                 to = cpuc->lbr_entries[i].to;
113                 type = branch_type_fused(from, to, 0, &offset);
114 
115                 /*
116                  * Adjust the branch from address in case of instruction
117                  * fusion where it points to an instruction preceding the
118                  * actual branch
119                  */
120                 if (offset) {
121                         cpuc->lbr_entries[i].from += offset;
122                         if (fused_only)
123                                 continue;
124                 }
125 
126                 /* If type does not correspond, then discard */
127                 if (type == X86_BR_NONE || (br_sel & type) != type) {
128                         cpuc->lbr_entries[i].from = 0;  /* mark invalid */
129                         compress = true;
130                 }
131 
132                 if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE)
133                         cpuc->lbr_entries[i].type = common_branch_type(type);
134         }
135 
136         if (!compress)
137                 return;
138 
139         /* Remove all invalid entries */
140         for (i = 0; i < cpuc->lbr_stack.nr; ) {
141                 if (!cpuc->lbr_entries[i].from) {
142                         j = i;
143                         while (++j < cpuc->lbr_stack.nr)
144                                 cpuc->lbr_entries[j - 1] = cpuc->lbr_entries[j];
145                         cpuc->lbr_stack.nr--;
146                         if (!cpuc->lbr_entries[i].from)
147                                 continue;
148                 }
149                 i++;
150         }
151 }
152 
153 static const int lbr_spec_map[PERF_BR_SPEC_MAX] = {
154         PERF_BR_SPEC_NA,
155         PERF_BR_SPEC_WRONG_PATH,
156         PERF_BR_NON_SPEC_CORRECT_PATH,
157         PERF_BR_SPEC_CORRECT_PATH,
158 };
159 
160 void amd_pmu_lbr_read(void)
161 {
162         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
163         struct perf_branch_entry *br = cpuc->lbr_entries;
164         struct branch_entry entry;
165         int out = 0, idx, i;
166 
167         if (!cpuc->lbr_users)
168                 return;
169 
170         for (i = 0; i < x86_pmu.lbr_nr; i++) {
171                 entry.from.full = amd_pmu_lbr_get_from(i);
172                 entry.to.full   = amd_pmu_lbr_get_to(i);
173 
174                 /*
175                  * Check if a branch has been logged; if valid = 0, spec = 0
176                  * then no branch was recorded; if reserved = 1 then an
177                  * erroneous branch was recorded (see Erratum 1452)
178                  */
179                 if ((!entry.to.split.valid && !entry.to.split.spec) ||
180                     entry.to.split.reserved)
181                         continue;
182 
183                 perf_clear_branch_entry_bitfields(br + out);
184 
185                 br[out].from    = sign_ext_branch_ip(entry.from.split.ip);
186                 br[out].to      = sign_ext_branch_ip(entry.to.split.ip);
187                 br[out].mispred = entry.from.split.mispredict;
188                 br[out].predicted = !br[out].mispred;
189 
190                 /*
191                  * Set branch speculation information using the status of
192                  * the valid and spec bits.
193                  *
194                  * When valid = 0, spec = 0, no branch was recorded and the
195                  * entry is discarded as seen above.
196                  *
197                  * When valid = 0, spec = 1, the recorded branch was
198                  * speculative but took the wrong path.
199                  *
200                  * When valid = 1, spec = 0, the recorded branch was
201                  * non-speculative but took the correct path.
202                  *
203                  * When valid = 1, spec = 1, the recorded branch was
204                  * speculative and took the correct path
205                  */
206                 idx = (entry.to.split.valid << 1) | entry.to.split.spec;
207                 br[out].spec = lbr_spec_map[idx];
208                 out++;
209         }
210 
211         cpuc->lbr_stack.nr = out;
212 
213         /*
214          * Internal register renaming always ensures that LBR From[0] and
215          * LBR To[0] always represent the TOS
216          */
217         cpuc->lbr_stack.hw_idx = 0;
218 
219         /* Perform further software filtering */
220         amd_pmu_lbr_filter();
221 }
222 
223 static const int lbr_select_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
224         [PERF_SAMPLE_BRANCH_USER_SHIFT]         = LBR_USER,
225         [PERF_SAMPLE_BRANCH_KERNEL_SHIFT]       = LBR_KERNEL,
226         [PERF_SAMPLE_BRANCH_HV_SHIFT]           = LBR_IGNORE,
227 
228         [PERF_SAMPLE_BRANCH_ANY_SHIFT]          = LBR_ANY,
229         [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT]     = LBR_REL_CALL | LBR_IND_CALL | LBR_FAR,
230         [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT]   = LBR_RETURN | LBR_FAR,
231         [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT]     = LBR_IND_CALL,
232         [PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT]     = LBR_NOT_SUPP,
233         [PERF_SAMPLE_BRANCH_IN_TX_SHIFT]        = LBR_NOT_SUPP,
234         [PERF_SAMPLE_BRANCH_NO_TX_SHIFT]        = LBR_NOT_SUPP,
235         [PERF_SAMPLE_BRANCH_COND_SHIFT]         = LBR_JCC,
236 
237         [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT]   = LBR_NOT_SUPP,
238         [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT]     = LBR_IND_JMP,
239         [PERF_SAMPLE_BRANCH_CALL_SHIFT]         = LBR_REL_CALL,
240 
241         [PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT]     = LBR_NOT_SUPP,
242         [PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT]    = LBR_NOT_SUPP,
243 };
244 
245 static int amd_pmu_lbr_setup_filter(struct perf_event *event)
246 {
247         struct hw_perf_event_extra *reg = &event->hw.branch_reg;
248         u64 br_type = event->attr.branch_sample_type;
249         u64 mask = 0, v;
250         int i;
251 
252         /* No LBR support */
253         if (!x86_pmu.lbr_nr)
254                 return -EOPNOTSUPP;
255 
256         if (br_type & PERF_SAMPLE_BRANCH_USER)
257                 mask |= X86_BR_USER;
258 
259         if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
260                 mask |= X86_BR_KERNEL;
261 
262         /* Ignore BRANCH_HV here */
263 
264         if (br_type & PERF_SAMPLE_BRANCH_ANY)
265                 mask |= X86_BR_ANY;
266 
267         if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL)
268                 mask |= X86_BR_ANY_CALL;
269 
270         if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
271                 mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET;
272 
273         if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
274                 mask |= X86_BR_IND_CALL;
275 
276         if (br_type & PERF_SAMPLE_BRANCH_COND)
277                 mask |= X86_BR_JCC;
278 
279         if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP)
280                 mask |= X86_BR_IND_JMP;
281 
282         if (br_type & PERF_SAMPLE_BRANCH_CALL)
283                 mask |= X86_BR_CALL | X86_BR_ZERO_CALL;
284 
285         if (br_type & PERF_SAMPLE_BRANCH_TYPE_SAVE)
286                 mask |= X86_BR_TYPE_SAVE;
287 
288         reg->reg = mask;
289         mask = 0;
290 
291         for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) {
292                 if (!(br_type & BIT_ULL(i)))
293                         continue;
294 
295                 v = lbr_select_map[i];
296                 if (v == LBR_NOT_SUPP)
297                         return -EOPNOTSUPP;
298 
299                 if (v != LBR_IGNORE)
300                         mask |= v;
301         }
302 
303         /* Filter bits operate in suppress mode */
304         reg->config = mask ^ LBR_SELECT_MASK;
305 
306         return 0;
307 }
308 
309 int amd_pmu_lbr_hw_config(struct perf_event *event)
310 {
311         int ret = 0;
312 
313         ret = amd_pmu_lbr_setup_filter(event);
314         if (!ret)
315                 event->attach_state |= PERF_ATTACH_SCHED_CB;
316 
317         return ret;
318 }
319 
320 void amd_pmu_lbr_reset(void)
321 {
322         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
323         int i;
324 
325         if (!x86_pmu.lbr_nr)
326                 return;
327 
328         /* Reset all branch records individually */
329         for (i = 0; i < x86_pmu.lbr_nr; i++) {
330                 amd_pmu_lbr_set_from(i, 0);
331                 amd_pmu_lbr_set_to(i, 0);
332         }
333 
334         cpuc->last_task_ctx = NULL;
335         cpuc->last_log_id = 0;
336         wrmsrl(MSR_AMD64_LBR_SELECT, 0);
337 }
338 
339 void amd_pmu_lbr_add(struct perf_event *event)
340 {
341         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
342         struct hw_perf_event_extra *reg = &event->hw.branch_reg;
343 
344         if (!x86_pmu.lbr_nr)
345                 return;
346 
347         if (has_branch_stack(event)) {
348                 cpuc->lbr_select = 1;
349                 cpuc->lbr_sel->config = reg->config;
350                 cpuc->br_sel = reg->reg;
351         }
352 
353         perf_sched_cb_inc(event->pmu);
354 
355         if (!cpuc->lbr_users++ && !event->total_time_running)
356                 amd_pmu_lbr_reset();
357 }
358 
359 void amd_pmu_lbr_del(struct perf_event *event)
360 {
361         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
362 
363         if (!x86_pmu.lbr_nr)
364                 return;
365 
366         if (has_branch_stack(event))
367                 cpuc->lbr_select = 0;
368 
369         cpuc->lbr_users--;
370         WARN_ON_ONCE(cpuc->lbr_users < 0);
371         perf_sched_cb_dec(event->pmu);
372 }
373 
374 void amd_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
375 {
376         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
377 
378         /*
379          * A context switch can flip the address space and LBR entries are
380          * not tagged with an identifier. Hence, branches cannot be resolved
381          * from the old address space and the LBR records should be wiped.
382          */
383         if (cpuc->lbr_users && sched_in)
384                 amd_pmu_lbr_reset();
385 }
386 
387 void amd_pmu_lbr_enable_all(void)
388 {
389         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
390         u64 lbr_select, dbg_ctl, dbg_extn_cfg;
391 
392         if (!cpuc->lbr_users || !x86_pmu.lbr_nr)
393                 return;
394 
395         /* Set hardware branch filter */
396         if (cpuc->lbr_select) {
397                 lbr_select = cpuc->lbr_sel->config & LBR_SELECT_MASK;
398                 wrmsrl(MSR_AMD64_LBR_SELECT, lbr_select);
399         }
400 
401         if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) {
402                 rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
403                 wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
404         }
405 
406         rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
407         wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg | DBG_EXTN_CFG_LBRV2EN);
408 }
409 
410 void amd_pmu_lbr_disable_all(void)
411 {
412         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
413 
414         if (!cpuc->lbr_users || !x86_pmu.lbr_nr)
415                 return;
416 
417         __amd_pmu_lbr_disable();
418 }
419 
420 __init int amd_pmu_lbr_init(void)
421 {
422         union cpuid_0x80000022_ebx ebx;
423 
424         if (x86_pmu.version < 2 || !boot_cpu_has(X86_FEATURE_AMD_LBR_V2))
425                 return -EOPNOTSUPP;
426 
427         /* Set number of entries */
428         ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES);
429         x86_pmu.lbr_nr = ebx.split.lbr_v2_stack_sz;
430 
431         pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr);
432 
433         return 0;
434 }
435 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php