~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/tools/testing/selftests/kvm/aarch64/vgic_lpi_stress.c

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0
  2 /*
  3  * vgic_lpi_stress - Stress test for KVM's ITS emulation
  4  *
  5  * Copyright (c) 2024 Google LLC
  6  */
  7 
  8 #include <linux/sizes.h>
  9 #include <pthread.h>
 10 #include <stdatomic.h>
 11 #include <sys/sysinfo.h>
 12 
 13 #include "kvm_util.h"
 14 #include "gic.h"
 15 #include "gic_v3.h"
 16 #include "gic_v3_its.h"
 17 #include "processor.h"
 18 #include "ucall.h"
 19 #include "vgic.h"
 20 
 21 #define TEST_MEMSLOT_INDEX      1
 22 
 23 #define GIC_LPI_OFFSET  8192
 24 
 25 static size_t nr_iterations = 1000;
 26 static vm_paddr_t gpa_base;
 27 
 28 static struct kvm_vm *vm;
 29 static struct kvm_vcpu **vcpus;
 30 static int gic_fd, its_fd;
 31 
 32 static struct test_data {
 33         bool            request_vcpus_stop;
 34         u32             nr_cpus;
 35         u32             nr_devices;
 36         u32             nr_event_ids;
 37 
 38         vm_paddr_t      device_table;
 39         vm_paddr_t      collection_table;
 40         vm_paddr_t      cmdq_base;
 41         void            *cmdq_base_va;
 42         vm_paddr_t      itt_tables;
 43 
 44         vm_paddr_t      lpi_prop_table;
 45         vm_paddr_t      lpi_pend_tables;
 46 } test_data =  {
 47         .nr_cpus        = 1,
 48         .nr_devices     = 1,
 49         .nr_event_ids   = 16,
 50 };
 51 
 52 static void guest_irq_handler(struct ex_regs *regs)
 53 {
 54         u32 intid = gic_get_and_ack_irq();
 55 
 56         if (intid == IAR_SPURIOUS)
 57                 return;
 58 
 59         GUEST_ASSERT(intid >= GIC_LPI_OFFSET);
 60         gic_set_eoi(intid);
 61 }
 62 
 63 static void guest_setup_its_mappings(void)
 64 {
 65         u32 coll_id, device_id, event_id, intid = GIC_LPI_OFFSET;
 66         u32 nr_events = test_data.nr_event_ids;
 67         u32 nr_devices = test_data.nr_devices;
 68         u32 nr_cpus = test_data.nr_cpus;
 69 
 70         for (coll_id = 0; coll_id < nr_cpus; coll_id++)
 71                 its_send_mapc_cmd(test_data.cmdq_base_va, coll_id, coll_id, true);
 72 
 73         /* Round-robin the LPIs to all of the vCPUs in the VM */
 74         coll_id = 0;
 75         for (device_id = 0; device_id < nr_devices; device_id++) {
 76                 vm_paddr_t itt_base = test_data.itt_tables + (device_id * SZ_64K);
 77 
 78                 its_send_mapd_cmd(test_data.cmdq_base_va, device_id,
 79                                   itt_base, SZ_64K, true);
 80 
 81                 for (event_id = 0; event_id < nr_events; event_id++) {
 82                         its_send_mapti_cmd(test_data.cmdq_base_va, device_id,
 83                                            event_id, coll_id, intid++);
 84 
 85                         coll_id = (coll_id + 1) % test_data.nr_cpus;
 86                 }
 87         }
 88 }
 89 
 90 static void guest_invalidate_all_rdists(void)
 91 {
 92         int i;
 93 
 94         for (i = 0; i < test_data.nr_cpus; i++)
 95                 its_send_invall_cmd(test_data.cmdq_base_va, i);
 96 }
 97 
 98 static void guest_setup_gic(void)
 99 {
100         static atomic_int nr_cpus_ready = 0;
101         u32 cpuid = guest_get_vcpuid();
102 
103         gic_init(GIC_V3, test_data.nr_cpus);
104         gic_rdist_enable_lpis(test_data.lpi_prop_table, SZ_64K,
105                               test_data.lpi_pend_tables + (cpuid * SZ_64K));
106 
107         atomic_fetch_add(&nr_cpus_ready, 1);
108 
109         if (cpuid > 0)
110                 return;
111 
112         while (atomic_load(&nr_cpus_ready) < test_data.nr_cpus)
113                 cpu_relax();
114 
115         its_init(test_data.collection_table, SZ_64K,
116                  test_data.device_table, SZ_64K,
117                  test_data.cmdq_base, SZ_64K);
118 
119         guest_setup_its_mappings();
120         guest_invalidate_all_rdists();
121 }
122 
123 static void guest_code(size_t nr_lpis)
124 {
125         guest_setup_gic();
126 
127         GUEST_SYNC(0);
128 
129         /*
130          * Don't use WFI here to avoid blocking the vCPU thread indefinitely and
131          * never getting the stop signal.
132          */
133         while (!READ_ONCE(test_data.request_vcpus_stop))
134                 cpu_relax();
135 
136         GUEST_DONE();
137 }
138 
139 static void setup_memslot(void)
140 {
141         size_t pages;
142         size_t sz;
143 
144         /*
145          * For the ITS:
146          *  - A single level device table
147          *  - A single level collection table
148          *  - The command queue
149          *  - An ITT for each device
150          */
151         sz = (3 + test_data.nr_devices) * SZ_64K;
152 
153         /*
154          * For the redistributors:
155          *  - A shared LPI configuration table
156          *  - An LPI pending table for each vCPU
157          */
158         sz += (1 + test_data.nr_cpus) * SZ_64K;
159 
160         pages = sz / vm->page_size;
161         gpa_base = ((vm_compute_max_gfn(vm) + 1) * vm->page_size) - sz;
162         vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, gpa_base,
163                                     TEST_MEMSLOT_INDEX, pages, 0);
164 }
165 
166 #define LPI_PROP_DEFAULT_PRIO   0xa0
167 
168 static void configure_lpis(void)
169 {
170         size_t nr_lpis = test_data.nr_devices * test_data.nr_event_ids;
171         u8 *tbl = addr_gpa2hva(vm, test_data.lpi_prop_table);
172         size_t i;
173 
174         for (i = 0; i < nr_lpis; i++) {
175                 tbl[i] = LPI_PROP_DEFAULT_PRIO |
176                          LPI_PROP_GROUP1 |
177                          LPI_PROP_ENABLED;
178         }
179 }
180 
181 static void setup_test_data(void)
182 {
183         size_t pages_per_64k = vm_calc_num_guest_pages(vm->mode, SZ_64K);
184         u32 nr_devices = test_data.nr_devices;
185         u32 nr_cpus = test_data.nr_cpus;
186         vm_paddr_t cmdq_base;
187 
188         test_data.device_table = vm_phy_pages_alloc(vm, pages_per_64k,
189                                                     gpa_base,
190                                                     TEST_MEMSLOT_INDEX);
191 
192         test_data.collection_table = vm_phy_pages_alloc(vm, pages_per_64k,
193                                                         gpa_base,
194                                                         TEST_MEMSLOT_INDEX);
195 
196         cmdq_base = vm_phy_pages_alloc(vm, pages_per_64k, gpa_base,
197                                        TEST_MEMSLOT_INDEX);
198         virt_map(vm, cmdq_base, cmdq_base, pages_per_64k);
199         test_data.cmdq_base = cmdq_base;
200         test_data.cmdq_base_va = (void *)cmdq_base;
201 
202         test_data.itt_tables = vm_phy_pages_alloc(vm, pages_per_64k * nr_devices,
203                                                   gpa_base, TEST_MEMSLOT_INDEX);
204 
205         test_data.lpi_prop_table = vm_phy_pages_alloc(vm, pages_per_64k,
206                                                       gpa_base, TEST_MEMSLOT_INDEX);
207         configure_lpis();
208 
209         test_data.lpi_pend_tables = vm_phy_pages_alloc(vm, pages_per_64k * nr_cpus,
210                                                        gpa_base, TEST_MEMSLOT_INDEX);
211 
212         sync_global_to_guest(vm, test_data);
213 }
214 
215 static void setup_gic(void)
216 {
217         gic_fd = vgic_v3_setup(vm, test_data.nr_cpus, 64);
218         __TEST_REQUIRE(gic_fd >= 0, "Failed to create GICv3");
219 
220         its_fd = vgic_its_setup(vm);
221 }
222 
223 static void signal_lpi(u32 device_id, u32 event_id)
224 {
225         vm_paddr_t db_addr = GITS_BASE_GPA + GITS_TRANSLATER;
226 
227         struct kvm_msi msi = {
228                 .address_lo     = db_addr,
229                 .address_hi     = db_addr >> 32,
230                 .data           = event_id,
231                 .devid          = device_id,
232                 .flags          = KVM_MSI_VALID_DEVID,
233         };
234 
235         /*
236          * KVM_SIGNAL_MSI returns 1 if the MSI wasn't 'blocked' by the VM,
237          * which for arm64 implies having a valid translation in the ITS.
238          */
239         TEST_ASSERT(__vm_ioctl(vm, KVM_SIGNAL_MSI, &msi) == 1,
240                     "KVM_SIGNAL_MSI ioctl failed");
241 }
242 
243 static pthread_barrier_t test_setup_barrier;
244 
245 static void *lpi_worker_thread(void *data)
246 {
247         u32 device_id = (size_t)data;
248         u32 event_id;
249         size_t i;
250 
251         pthread_barrier_wait(&test_setup_barrier);
252 
253         for (i = 0; i < nr_iterations; i++)
254                 for (event_id = 0; event_id < test_data.nr_event_ids; event_id++)
255                         signal_lpi(device_id, event_id);
256 
257         return NULL;
258 }
259 
260 static void *vcpu_worker_thread(void *data)
261 {
262         struct kvm_vcpu *vcpu = data;
263         struct ucall uc;
264 
265         while (true) {
266                 vcpu_run(vcpu);
267 
268                 switch (get_ucall(vcpu, &uc)) {
269                 case UCALL_SYNC:
270                         pthread_barrier_wait(&test_setup_barrier);
271                         continue;
272                 case UCALL_DONE:
273                         return NULL;
274                 case UCALL_ABORT:
275                         REPORT_GUEST_ASSERT(uc);
276                         break;
277                 default:
278                         TEST_FAIL("Unknown ucall: %lu", uc.cmd);
279                 }
280         }
281 
282         return NULL;
283 }
284 
285 static void report_stats(struct timespec delta)
286 {
287         double nr_lpis;
288         double time;
289 
290         nr_lpis = test_data.nr_devices * test_data.nr_event_ids * nr_iterations;
291 
292         time = delta.tv_sec;
293         time += ((double)delta.tv_nsec) / NSEC_PER_SEC;
294 
295         pr_info("Rate: %.2f LPIs/sec\n", nr_lpis / time);
296 }
297 
298 static void run_test(void)
299 {
300         u32 nr_devices = test_data.nr_devices;
301         u32 nr_vcpus = test_data.nr_cpus;
302         pthread_t *lpi_threads = malloc(nr_devices * sizeof(pthread_t));
303         pthread_t *vcpu_threads = malloc(nr_vcpus * sizeof(pthread_t));
304         struct timespec start, delta;
305         size_t i;
306 
307         TEST_ASSERT(lpi_threads && vcpu_threads, "Failed to allocate pthread arrays");
308 
309         pthread_barrier_init(&test_setup_barrier, NULL, nr_vcpus + nr_devices + 1);
310 
311         for (i = 0; i < nr_vcpus; i++)
312                 pthread_create(&vcpu_threads[i], NULL, vcpu_worker_thread, vcpus[i]);
313 
314         for (i = 0; i < nr_devices; i++)
315                 pthread_create(&lpi_threads[i], NULL, lpi_worker_thread, (void *)i);
316 
317         pthread_barrier_wait(&test_setup_barrier);
318 
319         clock_gettime(CLOCK_MONOTONIC, &start);
320 
321         for (i = 0; i < nr_devices; i++)
322                 pthread_join(lpi_threads[i], NULL);
323 
324         delta = timespec_elapsed(start);
325         write_guest_global(vm, test_data.request_vcpus_stop, true);
326 
327         for (i = 0; i < nr_vcpus; i++)
328                 pthread_join(vcpu_threads[i], NULL);
329 
330         report_stats(delta);
331 }
332 
333 static void setup_vm(void)
334 {
335         int i;
336 
337         vcpus = malloc(test_data.nr_cpus * sizeof(struct kvm_vcpu));
338         TEST_ASSERT(vcpus, "Failed to allocate vCPU array");
339 
340         vm = vm_create_with_vcpus(test_data.nr_cpus, guest_code, vcpus);
341 
342         vm_init_descriptor_tables(vm);
343         for (i = 0; i < test_data.nr_cpus; i++)
344                 vcpu_init_descriptor_tables(vcpus[i]);
345 
346         vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handler);
347 
348         setup_memslot();
349 
350         setup_gic();
351 
352         setup_test_data();
353 }
354 
355 static void destroy_vm(void)
356 {
357         close(its_fd);
358         close(gic_fd);
359         kvm_vm_free(vm);
360         free(vcpus);
361 }
362 
363 static void pr_usage(const char *name)
364 {
365         pr_info("%s [-v NR_VCPUS] [-d NR_DEVICES] [-e NR_EVENTS] [-i ITERS] -h\n", name);
366         pr_info("  -v:\tnumber of vCPUs (default: %u)\n", test_data.nr_cpus);
367         pr_info("  -d:\tnumber of devices (default: %u)\n", test_data.nr_devices);
368         pr_info("  -e:\tnumber of event IDs per device (default: %u)\n", test_data.nr_event_ids);
369         pr_info("  -i:\tnumber of iterations (default: %lu)\n", nr_iterations);
370 }
371 
372 int main(int argc, char **argv)
373 {
374         u32 nr_threads;
375         int c;
376 
377         while ((c = getopt(argc, argv, "hv:d:e:i:")) != -1) {
378                 switch (c) {
379                 case 'v':
380                         test_data.nr_cpus = atoi(optarg);
381                         break;
382                 case 'd':
383                         test_data.nr_devices = atoi(optarg);
384                         break;
385                 case 'e':
386                         test_data.nr_event_ids = atoi(optarg);
387                         break;
388                 case 'i':
389                         nr_iterations = strtoul(optarg, NULL, 0);
390                         break;
391                 case 'h':
392                 default:
393                         pr_usage(argv[0]);
394                         return 1;
395                 }
396         }
397 
398         nr_threads = test_data.nr_cpus + test_data.nr_devices;
399         if (nr_threads > get_nprocs())
400                 pr_info("WARNING: running %u threads on %d CPUs; performance is degraded.\n",
401                          nr_threads, get_nprocs());
402 
403         setup_vm();
404 
405         run_test();
406 
407         destroy_vm();
408 
409         return 0;
410 }
411 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php