~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~
vcpu_pmu.c

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~
  1 // SPDX-License-Identifier: GPL-2.0
  2 /*
  3  * Copyright (c) 2023 Rivos Inc
  4  *
  5  * Authors:
  6  *     Atish Patra <atishp@rivosinc.com>
  7  */
  8 
  9 #define pr_fmt(fmt)     "riscv-kvm-pmu: " fmt
 10 #include <linux/errno.h>
 11 #include <linux/err.h>
 12 #include <linux/kvm_host.h>
 13 #include <linux/perf/riscv_pmu.h>
 14 #include <asm/csr.h>
 15 #include <asm/kvm_vcpu_sbi.h>
 16 #include <asm/kvm_vcpu_pmu.h>
 17 #include <asm/sbi.h>
 18 #include <linux/bitops.h>
 19 
 20 #define kvm_pmu_num_counters(pmu) ((pmu)->num_hw_ctrs + (pmu)->num_fw_ctrs)
 21 #define get_event_type(x) (((x) & SBI_PMU_EVENT_IDX_TYPE_MASK) >> 16)
 22 #define get_event_code(x) ((x) & SBI_PMU_EVENT_IDX_CODE_MASK)
 23 
 24 static enum perf_hw_id hw_event_perf_map[SBI_PMU_HW_GENERAL_MAX] = {
 25         [SBI_PMU_HW_CPU_CYCLES] = PERF_COUNT_HW_CPU_CYCLES,
 26         [SBI_PMU_HW_INSTRUCTIONS] = PERF_COUNT_HW_INSTRUCTIONS,
 27         [SBI_PMU_HW_CACHE_REFERENCES] = PERF_COUNT_HW_CACHE_REFERENCES,
 28         [SBI_PMU_HW_CACHE_MISSES] = PERF_COUNT_HW_CACHE_MISSES,
 29         [SBI_PMU_HW_BRANCH_INSTRUCTIONS] = PERF_COUNT_HW_BRANCH_INSTRUCTIONS,
 30         [SBI_PMU_HW_BRANCH_MISSES] = PERF_COUNT_HW_BRANCH_MISSES,
 31         [SBI_PMU_HW_BUS_CYCLES] = PERF_COUNT_HW_BUS_CYCLES,
 32         [SBI_PMU_HW_STALLED_CYCLES_FRONTEND] = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND,
 33         [SBI_PMU_HW_STALLED_CYCLES_BACKEND] = PERF_COUNT_HW_STALLED_CYCLES_BACKEND,
 34         [SBI_PMU_HW_REF_CPU_CYCLES] = PERF_COUNT_HW_REF_CPU_CYCLES,
 35 };
 36 
 37 static u64 kvm_pmu_get_sample_period(struct kvm_pmc *pmc)
 38 {
 39         u64 counter_val_mask = GENMASK(pmc->cinfo.width, 0);
 40         u64 sample_period;
 41 
 42         if (!pmc->counter_val)
 43                 sample_period = counter_val_mask;
 44         else
 45                 sample_period = (-pmc->counter_val) & counter_val_mask;
 46 
 47         return sample_period;
 48 }
 49 
 50 static u32 kvm_pmu_get_perf_event_type(unsigned long eidx)
 51 {
 52         enum sbi_pmu_event_type etype = get_event_type(eidx);
 53         u32 type = PERF_TYPE_MAX;
 54 
 55         switch (etype) {
 56         case SBI_PMU_EVENT_TYPE_HW:
 57                 type = PERF_TYPE_HARDWARE;
 58                 break;
 59         case SBI_PMU_EVENT_TYPE_CACHE:
 60                 type = PERF_TYPE_HW_CACHE;
 61                 break;
 62         case SBI_PMU_EVENT_TYPE_RAW:
 63         case SBI_PMU_EVENT_TYPE_FW:
 64                 type = PERF_TYPE_RAW;
 65                 break;
 66         default:
 67                 break;
 68         }
 69 
 70         return type;
 71 }
 72 
 73 static bool kvm_pmu_is_fw_event(unsigned long eidx)
 74 {
 75         return get_event_type(eidx) == SBI_PMU_EVENT_TYPE_FW;
 76 }
 77 
 78 static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc)
 79 {
 80         if (pmc->perf_event) {
 81                 perf_event_disable(pmc->perf_event);
 82                 perf_event_release_kernel(pmc->perf_event);
 83                 pmc->perf_event = NULL;
 84         }
 85 }
 86 
 87 static u64 kvm_pmu_get_perf_event_hw_config(u32 sbi_event_code)
 88 {
 89         return hw_event_perf_map[sbi_event_code];
 90 }
 91 
 92 static u64 kvm_pmu_get_perf_event_cache_config(u32 sbi_event_code)
 93 {
 94         u64 config = U64_MAX;
 95         unsigned int cache_type, cache_op, cache_result;
 96 
 97         /* All the cache event masks lie within 0xFF. No separate masking is necessary */
 98         cache_type = (sbi_event_code & SBI_PMU_EVENT_CACHE_ID_CODE_MASK) >>
 99                       SBI_PMU_EVENT_CACHE_ID_SHIFT;
100         cache_op = (sbi_event_code & SBI_PMU_EVENT_CACHE_OP_ID_CODE_MASK) >>
101                     SBI_PMU_EVENT_CACHE_OP_SHIFT;
102         cache_result = sbi_event_code & SBI_PMU_EVENT_CACHE_RESULT_ID_CODE_MASK;
103 
104         if (cache_type >= PERF_COUNT_HW_CACHE_MAX ||
105             cache_op >= PERF_COUNT_HW_CACHE_OP_MAX ||
106             cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
107                 return config;
108 
109         config = cache_type | (cache_op << 8) | (cache_result << 16);
110 
111         return config;
112 }
113 
114 static u64 kvm_pmu_get_perf_event_config(unsigned long eidx, uint64_t evt_data)
115 {
116         enum sbi_pmu_event_type etype = get_event_type(eidx);
117         u32 ecode = get_event_code(eidx);
118         u64 config = U64_MAX;
119 
120         switch (etype) {
121         case SBI_PMU_EVENT_TYPE_HW:
122                 if (ecode < SBI_PMU_HW_GENERAL_MAX)
123                         config = kvm_pmu_get_perf_event_hw_config(ecode);
124                 break;
125         case SBI_PMU_EVENT_TYPE_CACHE:
126                 config = kvm_pmu_get_perf_event_cache_config(ecode);
127                 break;
128         case SBI_PMU_EVENT_TYPE_RAW:
129                 config = evt_data & RISCV_PMU_RAW_EVENT_MASK;
130                 break;
131         case SBI_PMU_EVENT_TYPE_FW:
132                 if (ecode < SBI_PMU_FW_MAX)
133                         config = (1ULL << 63) | ecode;
134                 break;
135         default:
136                 break;
137         }
138 
139         return config;
140 }
141 
142 static int kvm_pmu_get_fixed_pmc_index(unsigned long eidx)
143 {
144         u32 etype = kvm_pmu_get_perf_event_type(eidx);
145         u32 ecode = get_event_code(eidx);
146 
147         if (etype != SBI_PMU_EVENT_TYPE_HW)
148                 return -EINVAL;
149 
150         if (ecode == SBI_PMU_HW_CPU_CYCLES)
151                 return 0;
152         else if (ecode == SBI_PMU_HW_INSTRUCTIONS)
153                 return 2;
154         else
155                 return -EINVAL;
156 }
157 
158 static int kvm_pmu_get_programmable_pmc_index(struct kvm_pmu *kvpmu, unsigned long eidx,
159                                               unsigned long cbase, unsigned long cmask)
160 {
161         int ctr_idx = -1;
162         int i, pmc_idx;
163         int min, max;
164 
165         if (kvm_pmu_is_fw_event(eidx)) {
166                 /* Firmware counters are mapped 1:1 starting from num_hw_ctrs for simplicity */
167                 min = kvpmu->num_hw_ctrs;
168                 max = min + kvpmu->num_fw_ctrs;
169         } else {
170                 /* First 3 counters are reserved for fixed counters */
171                 min = 3;
172                 max = kvpmu->num_hw_ctrs;
173         }
174 
175         for_each_set_bit(i, &cmask, BITS_PER_LONG) {
176                 pmc_idx = i + cbase;
177                 if ((pmc_idx >= min && pmc_idx < max) &&
178                     !test_bit(pmc_idx, kvpmu->pmc_in_use)) {
179                         ctr_idx = pmc_idx;
180                         break;
181                 }
182         }
183 
184         return ctr_idx;
185 }
186 
187 static int pmu_get_pmc_index(struct kvm_pmu *pmu, unsigned long eidx,
188                              unsigned long cbase, unsigned long cmask)
189 {
190         int ret;
191 
192         /* Fixed counters need to be have fixed mapping as they have different width */
193         ret = kvm_pmu_get_fixed_pmc_index(eidx);
194         if (ret >= 0)
195                 return ret;
196 
197         return kvm_pmu_get_programmable_pmc_index(pmu, eidx, cbase, cmask);
198 }
199 
200 static int pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx,
201                               unsigned long *out_val)
202 {
203         struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
204         struct kvm_pmc *pmc;
205         int fevent_code;
206 
207         if (!IS_ENABLED(CONFIG_32BIT)) {
208                 pr_warn("%s: should be invoked for only RV32\n", __func__);
209                 return -EINVAL;
210         }
211 
212         if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) {
213                 pr_warn("Invalid counter id [%ld]during read\n", cidx);
214                 return -EINVAL;
215         }
216 
217         pmc = &kvpmu->pmc[cidx];
218 
219         if (pmc->cinfo.type != SBI_PMU_CTR_TYPE_FW)
220                 return -EINVAL;
221 
222         fevent_code = get_event_code(pmc->event_idx);
223         pmc->counter_val = kvpmu->fw_event[fevent_code].value;
224 
225         *out_val = pmc->counter_val >> 32;
226 
227         return 0;
228 }
229 
230 static int pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
231                         unsigned long *out_val)
232 {
233         struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
234         struct kvm_pmc *pmc;
235         u64 enabled, running;
236         int fevent_code;
237 
238         if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) {
239                 pr_warn("Invalid counter id [%ld] during read\n", cidx);
240                 return -EINVAL;
241         }
242 
243         pmc = &kvpmu->pmc[cidx];
244 
245         if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
246                 fevent_code = get_event_code(pmc->event_idx);
247                 pmc->counter_val = kvpmu->fw_event[fevent_code].value;
248         } else if (pmc->perf_event) {
249                 pmc->counter_val += perf_event_read_value(pmc->perf_event, &enabled, &running);
250         } else {
251                 return -EINVAL;
252         }
253         *out_val = pmc->counter_val;
254 
255         return 0;
256 }
257 
258 static int kvm_pmu_validate_counter_mask(struct kvm_pmu *kvpmu, unsigned long ctr_base,
259                                          unsigned long ctr_mask)
260 {
261         /* Make sure the we have a valid counter mask requested from the caller */
262         if (!ctr_mask || (ctr_base + __fls(ctr_mask) >= kvm_pmu_num_counters(kvpmu)))
263                 return -EINVAL;
264 
265         return 0;
266 }
267 
268 static void kvm_riscv_pmu_overflow(struct perf_event *perf_event,
269                                    struct perf_sample_data *data,
270                                    struct pt_regs *regs)
271 {
272         struct kvm_pmc *pmc = perf_event->overflow_handler_context;
273         struct kvm_vcpu *vcpu = pmc->vcpu;
274         struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
275         struct riscv_pmu *rpmu = to_riscv_pmu(perf_event->pmu);
276         u64 period;
277 
278         /*
279          * Stop the event counting by directly accessing the perf_event.
280          * Otherwise, this needs to deferred via a workqueue.
281          * That will introduce skew in the counter value because the actual
282          * physical counter would start after returning from this function.
283          * It will be stopped again once the workqueue is scheduled
284          */
285         rpmu->pmu.stop(perf_event, PERF_EF_UPDATE);
286 
287         /*
288          * The hw counter would start automatically when this function returns.
289          * Thus, the host may continue to interrupt and inject it to the guest
290          * even without the guest configuring the next event. Depending on the hardware
291          * the host may have some sluggishness only if privilege mode filtering is not
292          * available. In an ideal world, where qemu is not the only capable hardware,
293          * this can be removed.
294          * FYI: ARM64 does this way while x86 doesn't do anything as such.
295          * TODO: Should we keep it for RISC-V ?
296          */
297         period = -(local64_read(&perf_event->count));
298 
299         local64_set(&perf_event->hw.period_left, 0);
300         perf_event->attr.sample_period = period;
301         perf_event->hw.sample_period = period;
302 
303         set_bit(pmc->idx, kvpmu->pmc_overflown);
304         kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_PMU_OVF);
305 
306         rpmu->pmu.start(perf_event, PERF_EF_RELOAD);
307 }
308 
309 static long kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_attr *attr,
310                                       unsigned long flags, unsigned long eidx,
311                                       unsigned long evtdata)
312 {
313         struct perf_event *event;
314 
315         kvm_pmu_release_perf_event(pmc);
316         attr->config = kvm_pmu_get_perf_event_config(eidx, evtdata);
317         if (flags & SBI_PMU_CFG_FLAG_CLEAR_VALUE) {
318                 //TODO: Do we really want to clear the value in hardware counter
319                 pmc->counter_val = 0;
320         }
321 
322         /*
323          * Set the default sample_period for now. The guest specified value
324          * will be updated in the start call.
325          */
326         attr->sample_period = kvm_pmu_get_sample_period(pmc);
327 
328         event = perf_event_create_kernel_counter(attr, -1, current, kvm_riscv_pmu_overflow, pmc);
329         if (IS_ERR(event)) {
330                 pr_debug("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event));
331                 return PTR_ERR(event);
332         }
333 
334         pmc->perf_event = event;
335         if (flags & SBI_PMU_CFG_FLAG_AUTO_START)
336                 perf_event_enable(pmc->perf_event);
337 
338         return 0;
339 }
340 
341 int kvm_riscv_vcpu_pmu_incr_fw(struct kvm_vcpu *vcpu, unsigned long fid)
342 {
343         struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
344         struct kvm_fw_event *fevent;
345 
346         if (!kvpmu || fid >= SBI_PMU_FW_MAX)
347                 return -EINVAL;
348 
349         fevent = &kvpmu->fw_event[fid];
350         if (fevent->started)
351                 fevent->value++;
352 
353         return 0;
354 }
355 
356 int kvm_riscv_vcpu_pmu_read_hpm(struct kvm_vcpu *vcpu, unsigned int csr_num,
357                                 unsigned long *val, unsigned long new_val,
358                                 unsigned long wr_mask)
359 {
360         struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
361         int cidx, ret = KVM_INSN_CONTINUE_NEXT_SEPC;
362 
363         if (!kvpmu || !kvpmu->init_done) {
364                 /*
365                  * In absence of sscofpmf in the platform, the guest OS may use
366                  * the legacy PMU driver to read cycle/instret. In that case,
367                  * just return 0 to avoid any illegal trap. However, any other
368                  * hpmcounter access should result in illegal trap as they must
369                  * be access through SBI PMU only.
370                  */
371                 if (csr_num == CSR_CYCLE || csr_num == CSR_INSTRET) {
372                         *val = 0;
373                         return ret;
374                 } else {
375                         return KVM_INSN_ILLEGAL_TRAP;
376                 }
377         }
378 
379         /* The counter CSR are read only. Thus, any write should result in illegal traps */
380         if (wr_mask)
381                 return KVM_INSN_ILLEGAL_TRAP;
382 
383         cidx = csr_num - CSR_CYCLE;
384 
385         if (pmu_ctr_read(vcpu, cidx, val) < 0)
386                 return KVM_INSN_ILLEGAL_TRAP;
387 
388         return ret;
389 }
390 
391 static void kvm_pmu_clear_snapshot_area(struct kvm_vcpu *vcpu)
392 {
393         struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
394 
395         kfree(kvpmu->sdata);
396         kvpmu->sdata = NULL;
397         kvpmu->snapshot_addr = INVALID_GPA;
398 }
399 
400 int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long saddr_low,
401                                       unsigned long saddr_high, unsigned long flags,
402                                       struct kvm_vcpu_sbi_return *retdata)
403 {
404         struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
405         int snapshot_area_size = sizeof(struct riscv_pmu_snapshot_data);
406         int sbiret = 0;
407         gpa_t saddr;
408         unsigned long hva;
409         bool writable;
410 
411         if (!kvpmu || flags) {
412                 sbiret = SBI_ERR_INVALID_PARAM;
413                 goto out;
414         }
415 
416         if (saddr_low == SBI_SHMEM_DISABLE && saddr_high == SBI_SHMEM_DISABLE) {
417                 kvm_pmu_clear_snapshot_area(vcpu);
418                 return 0;
419         }
420 
421         saddr = saddr_low;
422 
423         if (saddr_high != 0) {
424                 if (IS_ENABLED(CONFIG_32BIT))
425                         saddr |= ((gpa_t)saddr_high << 32);
426                 else
427                         sbiret = SBI_ERR_INVALID_ADDRESS;
428                 goto out;
429         }
430 
431         hva = kvm_vcpu_gfn_to_hva_prot(vcpu, saddr >> PAGE_SHIFT, &writable);
432         if (kvm_is_error_hva(hva) || !writable) {
433                 sbiret = SBI_ERR_INVALID_ADDRESS;
434                 goto out;
435         }
436 
437         kvpmu->sdata = kzalloc(snapshot_area_size, GFP_ATOMIC);
438         if (!kvpmu->sdata)
439                 return -ENOMEM;
440 
441         if (kvm_vcpu_write_guest(vcpu, saddr, kvpmu->sdata, snapshot_area_size)) {
442                 kfree(kvpmu->sdata);
443                 sbiret = SBI_ERR_FAILURE;
444                 goto out;
445         }
446 
447         kvpmu->snapshot_addr = saddr;
448 
449 out:
450         retdata->err_val = sbiret;
451 
452         return 0;
453 }
454 
455 int kvm_riscv_vcpu_pmu_num_ctrs(struct kvm_vcpu *vcpu,
456                                 struct kvm_vcpu_sbi_return *retdata)
457 {
458         struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
459 
460         retdata->out_val = kvm_pmu_num_counters(kvpmu);
461 
462         return 0;
463 }
464 
465 int kvm_riscv_vcpu_pmu_ctr_info(struct kvm_vcpu *vcpu, unsigned long cidx,
466                                 struct kvm_vcpu_sbi_return *retdata)
467 {
468         struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
469 
470         if (cidx > RISCV_KVM_MAX_COUNTERS || cidx == 1) {
471                 retdata->err_val = SBI_ERR_INVALID_PARAM;
472                 return 0;
473         }
474 
475         retdata->out_val = kvpmu->pmc[cidx].cinfo.value;
476 
477         return 0;
478 }
479 
480 int kvm_riscv_vcpu_pmu_ctr_start(struct kvm_vcpu *vcpu, unsigned long ctr_base,
481                                  unsigned long ctr_mask, unsigned long flags, u64 ival,
482                                  struct kvm_vcpu_sbi_return *retdata)
483 {
484         struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
485         int i, pmc_index, sbiret = 0;
486         struct kvm_pmc *pmc;
487         int fevent_code;
488         bool snap_flag_set = flags & SBI_PMU_START_FLAG_INIT_SNAPSHOT;
489 
490         if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
491                 sbiret = SBI_ERR_INVALID_PARAM;
492                 goto out;
493         }
494 
495         if (snap_flag_set) {
496                 if (kvpmu->snapshot_addr == INVALID_GPA) {
497                         sbiret = SBI_ERR_NO_SHMEM;
498                         goto out;
499                 }
500                 if (kvm_vcpu_read_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata,
501                                         sizeof(struct riscv_pmu_snapshot_data))) {
502                         pr_warn("Unable to read snapshot shared memory while starting counters\n");
503                         sbiret = SBI_ERR_FAILURE;
504                         goto out;
505                 }
506         }
507         /* Start the counters that have been configured and requested by the guest */
508         for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) {
509                 pmc_index = i + ctr_base;
510                 if (!test_bit(pmc_index, kvpmu->pmc_in_use))
511                         continue;
512                 /* The guest started the counter again. Reset the overflow status */
513                 clear_bit(pmc_index, kvpmu->pmc_overflown);
514                 pmc = &kvpmu->pmc[pmc_index];
515                 if (flags & SBI_PMU_START_FLAG_SET_INIT_VALUE) {
516                         pmc->counter_val = ival;
517                 } else if (snap_flag_set) {
518                         /* The counter index in the snapshot are relative to the counter base */
519                         pmc->counter_val = kvpmu->sdata->ctr_values[i];
520                 }
521 
522                 if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
523                         fevent_code = get_event_code(pmc->event_idx);
524                         if (fevent_code >= SBI_PMU_FW_MAX) {
525                                 sbiret = SBI_ERR_INVALID_PARAM;
526                                 goto out;
527                         }
528 
529                         /* Check if the counter was already started for some reason */
530                         if (kvpmu->fw_event[fevent_code].started) {
531                                 sbiret = SBI_ERR_ALREADY_STARTED;
532                                 continue;
533                         }
534 
535                         kvpmu->fw_event[fevent_code].started = true;
536                         kvpmu->fw_event[fevent_code].value = pmc->counter_val;
537                 } else if (pmc->perf_event) {
538                         if (unlikely(pmc->started)) {
539                                 sbiret = SBI_ERR_ALREADY_STARTED;
540                                 continue;
541                         }
542                         perf_event_period(pmc->perf_event, kvm_pmu_get_sample_period(pmc));
543                         perf_event_enable(pmc->perf_event);
544                         pmc->started = true;
545                 } else {
546                         sbiret = SBI_ERR_INVALID_PARAM;
547                 }
548         }
549 
550 out:
551         retdata->err_val = sbiret;
552 
553         return 0;
554 }
555 
556 int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base,
557                                 unsigned long ctr_mask, unsigned long flags,
558                                 struct kvm_vcpu_sbi_return *retdata)
559 {
560         struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
561         int i, pmc_index, sbiret = 0;
562         u64 enabled, running;
563         struct kvm_pmc *pmc;
564         int fevent_code;
565         bool snap_flag_set = flags & SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT;
566         bool shmem_needs_update = false;
567 
568         if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
569                 sbiret = SBI_ERR_INVALID_PARAM;
570                 goto out;
571         }
572 
573         if (snap_flag_set && kvpmu->snapshot_addr == INVALID_GPA) {
574                 sbiret = SBI_ERR_NO_SHMEM;
575                 goto out;
576         }
577 
578         /* Stop the counters that have been configured and requested by the guest */
579         for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) {
580                 pmc_index = i + ctr_base;
581                 if (!test_bit(pmc_index, kvpmu->pmc_in_use))
582                         continue;
583                 pmc = &kvpmu->pmc[pmc_index];
584                 if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
585                         fevent_code = get_event_code(pmc->event_idx);
586                         if (fevent_code >= SBI_PMU_FW_MAX) {
587                                 sbiret = SBI_ERR_INVALID_PARAM;
588                                 goto out;
589                         }
590 
591                         if (!kvpmu->fw_event[fevent_code].started)
592                                 sbiret = SBI_ERR_ALREADY_STOPPED;
593 
594                         kvpmu->fw_event[fevent_code].started = false;
595                 } else if (pmc->perf_event) {
596                         if (pmc->started) {
597                                 /* Stop counting the counter */
598                                 perf_event_disable(pmc->perf_event);
599                                 pmc->started = false;
600                         } else {
601                                 sbiret = SBI_ERR_ALREADY_STOPPED;
602                         }
603 
604                         if (flags & SBI_PMU_STOP_FLAG_RESET)
605                                 /* Release the counter if this is a reset request */
606                                 kvm_pmu_release_perf_event(pmc);
607                 } else {
608                         sbiret = SBI_ERR_INVALID_PARAM;
609                 }
610 
611                 if (snap_flag_set && !sbiret) {
612                         if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW)
613                                 pmc->counter_val = kvpmu->fw_event[fevent_code].value;
614                         else if (pmc->perf_event)
615                                 pmc->counter_val += perf_event_read_value(pmc->perf_event,
616                                                                           &enabled, &running);
617                         /*
618                          * The counter and overflow indicies in the snapshot region are w.r.to
619                          * cbase. Modify the set bit in the counter mask instead of the pmc_index
620                          * which indicates the absolute counter index.
621                          */
622                         if (test_bit(pmc_index, kvpmu->pmc_overflown))
623                                 kvpmu->sdata->ctr_overflow_mask |= BIT(i);
624                         kvpmu->sdata->ctr_values[i] = pmc->counter_val;
625                         shmem_needs_update = true;
626                 }
627 
628                 if (flags & SBI_PMU_STOP_FLAG_RESET) {
629                         pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
630                         clear_bit(pmc_index, kvpmu->pmc_in_use);
631                         clear_bit(pmc_index, kvpmu->pmc_overflown);
632                         if (snap_flag_set) {
633                                 /*
634                                  * Only clear the given counter as the caller is responsible to
635                                  * validate both the overflow mask and configured counters.
636                                  */
637                                 kvpmu->sdata->ctr_overflow_mask &= ~BIT(i);
638                                 shmem_needs_update = true;
639                         }
640                 }
641         }
642 
643         if (shmem_needs_update)
644                 kvm_vcpu_write_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata,
645                                              sizeof(struct riscv_pmu_snapshot_data));
646 
647 out:
648         retdata->err_val = sbiret;
649 
650         return 0;
651 }
652 
653 int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_base,
654                                      unsigned long ctr_mask, unsigned long flags,
655                                      unsigned long eidx, u64 evtdata,
656                                      struct kvm_vcpu_sbi_return *retdata)
657 {
658         int ctr_idx, sbiret = 0;
659         long ret;
660         bool is_fevent;
661         unsigned long event_code;
662         u32 etype = kvm_pmu_get_perf_event_type(eidx);
663         struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
664         struct kvm_pmc *pmc = NULL;
665         struct perf_event_attr attr = {
666                 .type = etype,
667                 .size = sizeof(struct perf_event_attr),
668                 .pinned = true,
669                 /*
670                  * It should never reach here if the platform doesn't support the sscofpmf
671                  * extension as mode filtering won't work without it.
672                  */
673                 .exclude_host = true,
674                 .exclude_hv = true,
675                 .exclude_user = !!(flags & SBI_PMU_CFG_FLAG_SET_UINH),
676                 .exclude_kernel = !!(flags & SBI_PMU_CFG_FLAG_SET_SINH),
677                 .config1 = RISCV_PMU_CONFIG1_GUEST_EVENTS,
678         };
679 
680         if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
681                 sbiret = SBI_ERR_INVALID_PARAM;
682                 goto out;
683         }
684 
685         event_code = get_event_code(eidx);
686         is_fevent = kvm_pmu_is_fw_event(eidx);
687         if (is_fevent && event_code >= SBI_PMU_FW_MAX) {
688                 sbiret = SBI_ERR_NOT_SUPPORTED;
689                 goto out;
690         }
691 
692         /*
693          * SKIP_MATCH flag indicates the caller is aware of the assigned counter
694          * for this event. Just do a sanity check if it already marked used.
695          */
696         if (flags & SBI_PMU_CFG_FLAG_SKIP_MATCH) {
697                 if (!test_bit(ctr_base + __ffs(ctr_mask), kvpmu->pmc_in_use)) {
698                         sbiret = SBI_ERR_FAILURE;
699                         goto out;
700                 }
701                 ctr_idx = ctr_base + __ffs(ctr_mask);
702         } else  {
703                 ctr_idx = pmu_get_pmc_index(kvpmu, eidx, ctr_base, ctr_mask);
704                 if (ctr_idx < 0) {
705                         sbiret = SBI_ERR_NOT_SUPPORTED;
706                         goto out;
707                 }
708         }
709 
710         pmc = &kvpmu->pmc[ctr_idx];
711         pmc->idx = ctr_idx;
712 
713         if (is_fevent) {
714                 if (flags & SBI_PMU_CFG_FLAG_AUTO_START)
715                         kvpmu->fw_event[event_code].started = true;
716         } else {
717                 ret = kvm_pmu_create_perf_event(pmc, &attr, flags, eidx, evtdata);
718                 if (ret) {
719                         sbiret = SBI_ERR_NOT_SUPPORTED;
720                         goto out;
721                 }
722         }
723 
724         set_bit(ctr_idx, kvpmu->pmc_in_use);
725         pmc->event_idx = eidx;
726         retdata->out_val = ctr_idx;
727 out:
728         retdata->err_val = sbiret;
729 
730         return 0;
731 }
732 
733 int kvm_riscv_vcpu_pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx,
734                                       struct kvm_vcpu_sbi_return *retdata)
735 {
736         int ret;
737 
738         ret = pmu_fw_ctr_read_hi(vcpu, cidx, &retdata->out_val);
739         if (ret == -EINVAL)
740                 retdata->err_val = SBI_ERR_INVALID_PARAM;
741 
742         return 0;
743 }
744 
745 int kvm_riscv_vcpu_pmu_fw_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
746                                 struct kvm_vcpu_sbi_return *retdata)
747 {
748         int ret;
749 
750         ret = pmu_ctr_read(vcpu, cidx, &retdata->out_val);
751         if (ret == -EINVAL)
752                 retdata->err_val = SBI_ERR_INVALID_PARAM;
753 
754         return 0;
755 }
756 
757 void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu)
758 {
759         int i = 0, ret, num_hw_ctrs = 0, hpm_width = 0;
760         struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
761         struct kvm_pmc *pmc;
762 
763         /*
764          * PMU functionality should be only available to guests if privilege mode
765          * filtering is available in the host. Otherwise, guest will always count
766          * events while the execution is in hypervisor mode.
767          */
768         if (!riscv_isa_extension_available(NULL, SSCOFPMF))
769                 return;
770 
771         ret = riscv_pmu_get_hpm_info(&hpm_width, &num_hw_ctrs);
772         if (ret < 0 || !hpm_width || !num_hw_ctrs)
773                 return;
774 
775         /*
776          * Increase the number of hardware counters to offset the time counter.
777          */
778         kvpmu->num_hw_ctrs = num_hw_ctrs + 1;
779         kvpmu->num_fw_ctrs = SBI_PMU_FW_MAX;
780         memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event));
781         kvpmu->snapshot_addr = INVALID_GPA;
782 
783         if (kvpmu->num_hw_ctrs > RISCV_KVM_MAX_HW_CTRS) {
784                 pr_warn_once("Limiting the hardware counters to 32 as specified by the ISA");
785                 kvpmu->num_hw_ctrs = RISCV_KVM_MAX_HW_CTRS;
786         }
787 
788         /*
789          * There is no correlation between the logical hardware counter and virtual counters.
790          * However, we need to encode a hpmcounter CSR in the counter info field so that
791          * KVM can trap n emulate the read. This works well in the migration use case as
792          * KVM doesn't care if the actual hpmcounter is available in the hardware or not.
793          */
794         for (i = 0; i < kvm_pmu_num_counters(kvpmu); i++) {
795                 /* TIME CSR shouldn't be read from perf interface */
796                 if (i == 1)
797                         continue;
798                 pmc = &kvpmu->pmc[i];
799                 pmc->idx = i;
800                 pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
801                 pmc->vcpu = vcpu;
802                 if (i < kvpmu->num_hw_ctrs) {
803                         pmc->cinfo.type = SBI_PMU_CTR_TYPE_HW;
804                         if (i < 3)
805                                 /* CY, IR counters */
806                                 pmc->cinfo.width = 63;
807                         else
808                                 pmc->cinfo.width = hpm_width;
809                         /*
810                          * The CSR number doesn't have any relation with the logical
811                          * hardware counters. The CSR numbers are encoded sequentially
812                          * to avoid maintaining a map between the virtual counter
813                          * and CSR number.
814                          */
815                         pmc->cinfo.csr = CSR_CYCLE + i;
816                 } else {
817                         pmc->cinfo.type = SBI_PMU_CTR_TYPE_FW;
818                         pmc->cinfo.width = 63;
819                 }
820         }
821 
822         kvpmu->init_done = true;
823 }
824 
825 void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu)
826 {
827         struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
828         struct kvm_pmc *pmc;
829         int i;
830 
831         if (!kvpmu)
832                 return;
833 
834         for_each_set_bit(i, kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS) {
835                 pmc = &kvpmu->pmc[i];
836                 pmc->counter_val = 0;
837                 kvm_pmu_release_perf_event(pmc);
838                 pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
839         }
840         bitmap_zero(kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS);
841         bitmap_zero(kvpmu->pmc_overflown, RISCV_KVM_MAX_COUNTERS);
842         memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event));
843         kvm_pmu_clear_snapshot_area(vcpu);
844 }
845 
846 void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu)
847 {
848         kvm_riscv_vcpu_pmu_deinit(vcpu);
849 }
850
~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.
TOMOYO Linux Cross Reference Linux/arch/riscv/kvm/vcpu_pmu.c

TOMOYO Linux Cross Reference
Linux/arch/riscv/kvm/vcpu_pmu.c