~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/virt/kvm/eventfd.c

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0-only
  2 /*
  3  * kvm eventfd support - use eventfd objects to signal various KVM events
  4  *
  5  * Copyright 2009 Novell.  All Rights Reserved.
  6  * Copyright 2010 Red Hat, Inc. and/or its affiliates.
  7  *
  8  * Author:
  9  *      Gregory Haskins <ghaskins@novell.com>
 10  */
 11 
 12 #include <linux/kvm_host.h>
 13 #include <linux/kvm.h>
 14 #include <linux/kvm_irqfd.h>
 15 #include <linux/workqueue.h>
 16 #include <linux/syscalls.h>
 17 #include <linux/wait.h>
 18 #include <linux/poll.h>
 19 #include <linux/file.h>
 20 #include <linux/list.h>
 21 #include <linux/eventfd.h>
 22 #include <linux/kernel.h>
 23 #include <linux/srcu.h>
 24 #include <linux/slab.h>
 25 #include <linux/seqlock.h>
 26 #include <linux/irqbypass.h>
 27 #include <trace/events/kvm.h>
 28 
 29 #include <kvm/iodev.h>
 30 
 31 #ifdef CONFIG_HAVE_KVM_IRQCHIP
 32 
 33 static struct workqueue_struct *irqfd_cleanup_wq;
 34 
 35 bool __attribute__((weak))
 36 kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args)
 37 {
 38         return true;
 39 }
 40 
 41 static void
 42 irqfd_inject(struct work_struct *work)
 43 {
 44         struct kvm_kernel_irqfd *irqfd =
 45                 container_of(work, struct kvm_kernel_irqfd, inject);
 46         struct kvm *kvm = irqfd->kvm;
 47 
 48         if (!irqfd->resampler) {
 49                 kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1,
 50                                 false);
 51                 kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0,
 52                                 false);
 53         } else
 54                 kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
 55                             irqfd->gsi, 1, false);
 56 }
 57 
 58 static void irqfd_resampler_notify(struct kvm_kernel_irqfd_resampler *resampler)
 59 {
 60         struct kvm_kernel_irqfd *irqfd;
 61 
 62         list_for_each_entry_srcu(irqfd, &resampler->list, resampler_link,
 63                                  srcu_read_lock_held(&resampler->kvm->irq_srcu))
 64                 eventfd_signal(irqfd->resamplefd);
 65 }
 66 
 67 /*
 68  * Since resampler irqfds share an IRQ source ID, we de-assert once
 69  * then notify all of the resampler irqfds using this GSI.  We can't
 70  * do multiple de-asserts or we risk racing with incoming re-asserts.
 71  */
 72 static void
 73 irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)
 74 {
 75         struct kvm_kernel_irqfd_resampler *resampler;
 76         struct kvm *kvm;
 77         int idx;
 78 
 79         resampler = container_of(kian,
 80                         struct kvm_kernel_irqfd_resampler, notifier);
 81         kvm = resampler->kvm;
 82 
 83         kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
 84                     resampler->notifier.gsi, 0, false);
 85 
 86         idx = srcu_read_lock(&kvm->irq_srcu);
 87         irqfd_resampler_notify(resampler);
 88         srcu_read_unlock(&kvm->irq_srcu, idx);
 89 }
 90 
 91 static void
 92 irqfd_resampler_shutdown(struct kvm_kernel_irqfd *irqfd)
 93 {
 94         struct kvm_kernel_irqfd_resampler *resampler = irqfd->resampler;
 95         struct kvm *kvm = resampler->kvm;
 96 
 97         mutex_lock(&kvm->irqfds.resampler_lock);
 98 
 99         list_del_rcu(&irqfd->resampler_link);
100 
101         if (list_empty(&resampler->list)) {
102                 list_del_rcu(&resampler->link);
103                 kvm_unregister_irq_ack_notifier(kvm, &resampler->notifier);
104                 /*
105                  * synchronize_srcu_expedited(&kvm->irq_srcu) already called
106                  * in kvm_unregister_irq_ack_notifier().
107                  */
108                 kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
109                             resampler->notifier.gsi, 0, false);
110                 kfree(resampler);
111         } else {
112                 synchronize_srcu_expedited(&kvm->irq_srcu);
113         }
114 
115         mutex_unlock(&kvm->irqfds.resampler_lock);
116 }
117 
118 /*
119  * Race-free decouple logic (ordering is critical)
120  */
121 static void
122 irqfd_shutdown(struct work_struct *work)
123 {
124         struct kvm_kernel_irqfd *irqfd =
125                 container_of(work, struct kvm_kernel_irqfd, shutdown);
126         struct kvm *kvm = irqfd->kvm;
127         u64 cnt;
128 
129         /* Make sure irqfd has been initialized in assign path. */
130         synchronize_srcu_expedited(&kvm->irq_srcu);
131 
132         /*
133          * Synchronize with the wait-queue and unhook ourselves to prevent
134          * further events.
135          */
136         eventfd_ctx_remove_wait_queue(irqfd->eventfd, &irqfd->wait, &cnt);
137 
138         /*
139          * We know no new events will be scheduled at this point, so block
140          * until all previously outstanding events have completed
141          */
142         flush_work(&irqfd->inject);
143 
144         if (irqfd->resampler) {
145                 irqfd_resampler_shutdown(irqfd);
146                 eventfd_ctx_put(irqfd->resamplefd);
147         }
148 
149         /*
150          * It is now safe to release the object's resources
151          */
152 #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
153         irq_bypass_unregister_consumer(&irqfd->consumer);
154 #endif
155         eventfd_ctx_put(irqfd->eventfd);
156         kfree(irqfd);
157 }
158 
159 
160 /* assumes kvm->irqfds.lock is held */
161 static bool
162 irqfd_is_active(struct kvm_kernel_irqfd *irqfd)
163 {
164         return list_empty(&irqfd->list) ? false : true;
165 }
166 
167 /*
168  * Mark the irqfd as inactive and schedule it for removal
169  *
170  * assumes kvm->irqfds.lock is held
171  */
172 static void
173 irqfd_deactivate(struct kvm_kernel_irqfd *irqfd)
174 {
175         BUG_ON(!irqfd_is_active(irqfd));
176 
177         list_del_init(&irqfd->list);
178 
179         queue_work(irqfd_cleanup_wq, &irqfd->shutdown);
180 }
181 
182 int __attribute__((weak)) kvm_arch_set_irq_inatomic(
183                                 struct kvm_kernel_irq_routing_entry *irq,
184                                 struct kvm *kvm, int irq_source_id,
185                                 int level,
186                                 bool line_status)
187 {
188         return -EWOULDBLOCK;
189 }
190 
191 /*
192  * Called with wqh->lock held and interrupts disabled
193  */
194 static int
195 irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
196 {
197         struct kvm_kernel_irqfd *irqfd =
198                 container_of(wait, struct kvm_kernel_irqfd, wait);
199         __poll_t flags = key_to_poll(key);
200         struct kvm_kernel_irq_routing_entry irq;
201         struct kvm *kvm = irqfd->kvm;
202         unsigned seq;
203         int idx;
204         int ret = 0;
205 
206         if (flags & EPOLLIN) {
207                 u64 cnt;
208                 eventfd_ctx_do_read(irqfd->eventfd, &cnt);
209 
210                 idx = srcu_read_lock(&kvm->irq_srcu);
211                 do {
212                         seq = read_seqcount_begin(&irqfd->irq_entry_sc);
213                         irq = irqfd->irq_entry;
214                 } while (read_seqcount_retry(&irqfd->irq_entry_sc, seq));
215                 /* An event has been signaled, inject an interrupt */
216                 if (kvm_arch_set_irq_inatomic(&irq, kvm,
217                                               KVM_USERSPACE_IRQ_SOURCE_ID, 1,
218                                               false) == -EWOULDBLOCK)
219                         schedule_work(&irqfd->inject);
220                 srcu_read_unlock(&kvm->irq_srcu, idx);
221                 ret = 1;
222         }
223 
224         if (flags & EPOLLHUP) {
225                 /* The eventfd is closing, detach from KVM */
226                 unsigned long iflags;
227 
228                 spin_lock_irqsave(&kvm->irqfds.lock, iflags);
229 
230                 /*
231                  * We must check if someone deactivated the irqfd before
232                  * we could acquire the irqfds.lock since the item is
233                  * deactivated from the KVM side before it is unhooked from
234                  * the wait-queue.  If it is already deactivated, we can
235                  * simply return knowing the other side will cleanup for us.
236                  * We cannot race against the irqfd going away since the
237                  * other side is required to acquire wqh->lock, which we hold
238                  */
239                 if (irqfd_is_active(irqfd))
240                         irqfd_deactivate(irqfd);
241 
242                 spin_unlock_irqrestore(&kvm->irqfds.lock, iflags);
243         }
244 
245         return ret;
246 }
247 
248 static void
249 irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
250                         poll_table *pt)
251 {
252         struct kvm_kernel_irqfd *irqfd =
253                 container_of(pt, struct kvm_kernel_irqfd, pt);
254         add_wait_queue_priority(wqh, &irqfd->wait);
255 }
256 
257 /* Must be called under irqfds.lock */
258 static void irqfd_update(struct kvm *kvm, struct kvm_kernel_irqfd *irqfd)
259 {
260         struct kvm_kernel_irq_routing_entry *e;
261         struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
262         int n_entries;
263 
264         n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi);
265 
266         write_seqcount_begin(&irqfd->irq_entry_sc);
267 
268         e = entries;
269         if (n_entries == 1)
270                 irqfd->irq_entry = *e;
271         else
272                 irqfd->irq_entry.type = 0;
273 
274         write_seqcount_end(&irqfd->irq_entry_sc);
275 }
276 
277 #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
278 void __attribute__((weak)) kvm_arch_irq_bypass_stop(
279                                 struct irq_bypass_consumer *cons)
280 {
281 }
282 
283 void __attribute__((weak)) kvm_arch_irq_bypass_start(
284                                 struct irq_bypass_consumer *cons)
285 {
286 }
287 
288 int  __attribute__((weak)) kvm_arch_update_irqfd_routing(
289                                 struct kvm *kvm, unsigned int host_irq,
290                                 uint32_t guest_irq, bool set)
291 {
292         return 0;
293 }
294 
295 bool __attribute__((weak)) kvm_arch_irqfd_route_changed(
296                                 struct kvm_kernel_irq_routing_entry *old,
297                                 struct kvm_kernel_irq_routing_entry *new)
298 {
299         return true;
300 }
301 #endif
302 
303 static int
304 kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
305 {
306         struct kvm_kernel_irqfd *irqfd, *tmp;
307         struct fd f;
308         struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL;
309         int ret;
310         __poll_t events;
311         int idx;
312 
313         if (!kvm_arch_intc_initialized(kvm))
314                 return -EAGAIN;
315 
316         if (!kvm_arch_irqfd_allowed(kvm, args))
317                 return -EINVAL;
318 
319         irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL_ACCOUNT);
320         if (!irqfd)
321                 return -ENOMEM;
322 
323         irqfd->kvm = kvm;
324         irqfd->gsi = args->gsi;
325         INIT_LIST_HEAD(&irqfd->list);
326         INIT_WORK(&irqfd->inject, irqfd_inject);
327         INIT_WORK(&irqfd->shutdown, irqfd_shutdown);
328         seqcount_spinlock_init(&irqfd->irq_entry_sc, &kvm->irqfds.lock);
329 
330         f = fdget(args->fd);
331         if (!f.file) {
332                 ret = -EBADF;
333                 goto out;
334         }
335 
336         eventfd = eventfd_ctx_fileget(f.file);
337         if (IS_ERR(eventfd)) {
338                 ret = PTR_ERR(eventfd);
339                 goto fail;
340         }
341 
342         irqfd->eventfd = eventfd;
343 
344         if (args->flags & KVM_IRQFD_FLAG_RESAMPLE) {
345                 struct kvm_kernel_irqfd_resampler *resampler;
346 
347                 resamplefd = eventfd_ctx_fdget(args->resamplefd);
348                 if (IS_ERR(resamplefd)) {
349                         ret = PTR_ERR(resamplefd);
350                         goto fail;
351                 }
352 
353                 irqfd->resamplefd = resamplefd;
354                 INIT_LIST_HEAD(&irqfd->resampler_link);
355 
356                 mutex_lock(&kvm->irqfds.resampler_lock);
357 
358                 list_for_each_entry(resampler,
359                                     &kvm->irqfds.resampler_list, link) {
360                         if (resampler->notifier.gsi == irqfd->gsi) {
361                                 irqfd->resampler = resampler;
362                                 break;
363                         }
364                 }
365 
366                 if (!irqfd->resampler) {
367                         resampler = kzalloc(sizeof(*resampler),
368                                             GFP_KERNEL_ACCOUNT);
369                         if (!resampler) {
370                                 ret = -ENOMEM;
371                                 mutex_unlock(&kvm->irqfds.resampler_lock);
372                                 goto fail;
373                         }
374 
375                         resampler->kvm = kvm;
376                         INIT_LIST_HEAD(&resampler->list);
377                         resampler->notifier.gsi = irqfd->gsi;
378                         resampler->notifier.irq_acked = irqfd_resampler_ack;
379                         INIT_LIST_HEAD(&resampler->link);
380 
381                         list_add_rcu(&resampler->link, &kvm->irqfds.resampler_list);
382                         kvm_register_irq_ack_notifier(kvm,
383                                                       &resampler->notifier);
384                         irqfd->resampler = resampler;
385                 }
386 
387                 list_add_rcu(&irqfd->resampler_link, &irqfd->resampler->list);
388                 synchronize_srcu_expedited(&kvm->irq_srcu);
389 
390                 mutex_unlock(&kvm->irqfds.resampler_lock);
391         }
392 
393         /*
394          * Install our own custom wake-up handling so we are notified via
395          * a callback whenever someone signals the underlying eventfd
396          */
397         init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup);
398         init_poll_funcptr(&irqfd->pt, irqfd_ptable_queue_proc);
399 
400         spin_lock_irq(&kvm->irqfds.lock);
401 
402         ret = 0;
403         list_for_each_entry(tmp, &kvm->irqfds.items, list) {
404                 if (irqfd->eventfd != tmp->eventfd)
405                         continue;
406                 /* This fd is used for another irq already. */
407                 ret = -EBUSY;
408                 spin_unlock_irq(&kvm->irqfds.lock);
409                 goto fail;
410         }
411 
412         idx = srcu_read_lock(&kvm->irq_srcu);
413         irqfd_update(kvm, irqfd);
414 
415         list_add_tail(&irqfd->list, &kvm->irqfds.items);
416 
417         spin_unlock_irq(&kvm->irqfds.lock);
418 
419         /*
420          * Check if there was an event already pending on the eventfd
421          * before we registered, and trigger it as if we didn't miss it.
422          */
423         events = vfs_poll(f.file, &irqfd->pt);
424 
425         if (events & EPOLLIN)
426                 schedule_work(&irqfd->inject);
427 
428 #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
429         if (kvm_arch_has_irq_bypass()) {
430                 irqfd->consumer.token = (void *)irqfd->eventfd;
431                 irqfd->consumer.add_producer = kvm_arch_irq_bypass_add_producer;
432                 irqfd->consumer.del_producer = kvm_arch_irq_bypass_del_producer;
433                 irqfd->consumer.stop = kvm_arch_irq_bypass_stop;
434                 irqfd->consumer.start = kvm_arch_irq_bypass_start;
435                 ret = irq_bypass_register_consumer(&irqfd->consumer);
436                 if (ret)
437                         pr_info("irq bypass consumer (token %p) registration fails: %d\n",
438                                 irqfd->consumer.token, ret);
439         }
440 #endif
441 
442         srcu_read_unlock(&kvm->irq_srcu, idx);
443 
444         /*
445          * do not drop the file until the irqfd is fully initialized, otherwise
446          * we might race against the EPOLLHUP
447          */
448         fdput(f);
449         return 0;
450 
451 fail:
452         if (irqfd->resampler)
453                 irqfd_resampler_shutdown(irqfd);
454 
455         if (resamplefd && !IS_ERR(resamplefd))
456                 eventfd_ctx_put(resamplefd);
457 
458         if (eventfd && !IS_ERR(eventfd))
459                 eventfd_ctx_put(eventfd);
460 
461         fdput(f);
462 
463 out:
464         kfree(irqfd);
465         return ret;
466 }
467 
468 bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
469 {
470         struct kvm_irq_ack_notifier *kian;
471         int gsi, idx;
472 
473         idx = srcu_read_lock(&kvm->irq_srcu);
474         gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
475         if (gsi != -1)
476                 hlist_for_each_entry_srcu(kian, &kvm->irq_ack_notifier_list,
477                                           link, srcu_read_lock_held(&kvm->irq_srcu))
478                         if (kian->gsi == gsi) {
479                                 srcu_read_unlock(&kvm->irq_srcu, idx);
480                                 return true;
481                         }
482 
483         srcu_read_unlock(&kvm->irq_srcu, idx);
484 
485         return false;
486 }
487 EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
488 
489 void kvm_notify_acked_gsi(struct kvm *kvm, int gsi)
490 {
491         struct kvm_irq_ack_notifier *kian;
492 
493         hlist_for_each_entry_srcu(kian, &kvm->irq_ack_notifier_list,
494                                   link, srcu_read_lock_held(&kvm->irq_srcu))
495                 if (kian->gsi == gsi)
496                         kian->irq_acked(kian);
497 }
498 
499 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
500 {
501         int gsi, idx;
502 
503         trace_kvm_ack_irq(irqchip, pin);
504 
505         idx = srcu_read_lock(&kvm->irq_srcu);
506         gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
507         if (gsi != -1)
508                 kvm_notify_acked_gsi(kvm, gsi);
509         srcu_read_unlock(&kvm->irq_srcu, idx);
510 }
511 
512 void kvm_register_irq_ack_notifier(struct kvm *kvm,
513                                    struct kvm_irq_ack_notifier *kian)
514 {
515         mutex_lock(&kvm->irq_lock);
516         hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
517         mutex_unlock(&kvm->irq_lock);
518         kvm_arch_post_irq_ack_notifier_list_update(kvm);
519 }
520 
521 void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
522                                     struct kvm_irq_ack_notifier *kian)
523 {
524         mutex_lock(&kvm->irq_lock);
525         hlist_del_init_rcu(&kian->link);
526         mutex_unlock(&kvm->irq_lock);
527         synchronize_srcu_expedited(&kvm->irq_srcu);
528         kvm_arch_post_irq_ack_notifier_list_update(kvm);
529 }
530 
531 /*
532  * shutdown any irqfd's that match fd+gsi
533  */
534 static int
535 kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
536 {
537         struct kvm_kernel_irqfd *irqfd, *tmp;
538         struct eventfd_ctx *eventfd;
539 
540         eventfd = eventfd_ctx_fdget(args->fd);
541         if (IS_ERR(eventfd))
542                 return PTR_ERR(eventfd);
543 
544         spin_lock_irq(&kvm->irqfds.lock);
545 
546         list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) {
547                 if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi) {
548                         /*
549                          * This clearing of irq_entry.type is needed for when
550                          * another thread calls kvm_irq_routing_update before
551                          * we flush workqueue below (we synchronize with
552                          * kvm_irq_routing_update using irqfds.lock).
553                          */
554                         write_seqcount_begin(&irqfd->irq_entry_sc);
555                         irqfd->irq_entry.type = 0;
556                         write_seqcount_end(&irqfd->irq_entry_sc);
557                         irqfd_deactivate(irqfd);
558                 }
559         }
560 
561         spin_unlock_irq(&kvm->irqfds.lock);
562         eventfd_ctx_put(eventfd);
563 
564         /*
565          * Block until we know all outstanding shutdown jobs have completed
566          * so that we guarantee there will not be any more interrupts on this
567          * gsi once this deassign function returns.
568          */
569         flush_workqueue(irqfd_cleanup_wq);
570 
571         return 0;
572 }
573 
574 int
575 kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
576 {
577         if (args->flags & ~(KVM_IRQFD_FLAG_DEASSIGN | KVM_IRQFD_FLAG_RESAMPLE))
578                 return -EINVAL;
579 
580         if (args->flags & KVM_IRQFD_FLAG_DEASSIGN)
581                 return kvm_irqfd_deassign(kvm, args);
582 
583         return kvm_irqfd_assign(kvm, args);
584 }
585 
586 /*
587  * This function is called as the kvm VM fd is being released. Shutdown all
588  * irqfds that still remain open
589  */
590 void
591 kvm_irqfd_release(struct kvm *kvm)
592 {
593         struct kvm_kernel_irqfd *irqfd, *tmp;
594 
595         spin_lock_irq(&kvm->irqfds.lock);
596 
597         list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list)
598                 irqfd_deactivate(irqfd);
599 
600         spin_unlock_irq(&kvm->irqfds.lock);
601 
602         /*
603          * Block until we know all outstanding shutdown jobs have completed
604          * since we do not take a kvm* reference.
605          */
606         flush_workqueue(irqfd_cleanup_wq);
607 
608 }
609 
610 /*
611  * Take note of a change in irq routing.
612  * Caller must invoke synchronize_srcu_expedited(&kvm->irq_srcu) afterwards.
613  */
614 void kvm_irq_routing_update(struct kvm *kvm)
615 {
616         struct kvm_kernel_irqfd *irqfd;
617 
618         spin_lock_irq(&kvm->irqfds.lock);
619 
620         list_for_each_entry(irqfd, &kvm->irqfds.items, list) {
621 #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
622                 /* Under irqfds.lock, so can read irq_entry safely */
623                 struct kvm_kernel_irq_routing_entry old = irqfd->irq_entry;
624 #endif
625 
626                 irqfd_update(kvm, irqfd);
627 
628 #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
629                 if (irqfd->producer &&
630                     kvm_arch_irqfd_route_changed(&old, &irqfd->irq_entry)) {
631                         int ret = kvm_arch_update_irqfd_routing(
632                                         irqfd->kvm, irqfd->producer->irq,
633                                         irqfd->gsi, 1);
634                         WARN_ON(ret);
635                 }
636 #endif
637         }
638 
639         spin_unlock_irq(&kvm->irqfds.lock);
640 }
641 
642 bool kvm_notify_irqfd_resampler(struct kvm *kvm,
643                                 unsigned int irqchip,
644                                 unsigned int pin)
645 {
646         struct kvm_kernel_irqfd_resampler *resampler;
647         int gsi, idx;
648 
649         idx = srcu_read_lock(&kvm->irq_srcu);
650         gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
651         if (gsi != -1) {
652                 list_for_each_entry_srcu(resampler,
653                                          &kvm->irqfds.resampler_list, link,
654                                          srcu_read_lock_held(&kvm->irq_srcu)) {
655                         if (resampler->notifier.gsi == gsi) {
656                                 irqfd_resampler_notify(resampler);
657                                 srcu_read_unlock(&kvm->irq_srcu, idx);
658                                 return true;
659                         }
660                 }
661         }
662         srcu_read_unlock(&kvm->irq_srcu, idx);
663 
664         return false;
665 }
666 
667 /*
668  * create a host-wide workqueue for issuing deferred shutdown requests
669  * aggregated from all vm* instances. We need our own isolated
670  * queue to ease flushing work items when a VM exits.
671  */
672 int kvm_irqfd_init(void)
673 {
674         irqfd_cleanup_wq = alloc_workqueue("kvm-irqfd-cleanup", 0, 0);
675         if (!irqfd_cleanup_wq)
676                 return -ENOMEM;
677 
678         return 0;
679 }
680 
681 void kvm_irqfd_exit(void)
682 {
683         destroy_workqueue(irqfd_cleanup_wq);
684 }
685 #endif
686 
687 /*
688  * --------------------------------------------------------------------
689  * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal.
690  *
691  * userspace can register a PIO/MMIO address with an eventfd for receiving
692  * notification when the memory has been touched.
693  * --------------------------------------------------------------------
694  */
695 
696 struct _ioeventfd {
697         struct list_head     list;
698         u64                  addr;
699         int                  length;
700         struct eventfd_ctx  *eventfd;
701         u64                  datamatch;
702         struct kvm_io_device dev;
703         u8                   bus_idx;
704         bool                 wildcard;
705 };
706 
707 static inline struct _ioeventfd *
708 to_ioeventfd(struct kvm_io_device *dev)
709 {
710         return container_of(dev, struct _ioeventfd, dev);
711 }
712 
713 static void
714 ioeventfd_release(struct _ioeventfd *p)
715 {
716         eventfd_ctx_put(p->eventfd);
717         list_del(&p->list);
718         kfree(p);
719 }
720 
721 static bool
722 ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val)
723 {
724         u64 _val;
725 
726         if (addr != p->addr)
727                 /* address must be precise for a hit */
728                 return false;
729 
730         if (!p->length)
731                 /* length = 0 means only look at the address, so always a hit */
732                 return true;
733 
734         if (len != p->length)
735                 /* address-range must be precise for a hit */
736                 return false;
737 
738         if (p->wildcard)
739                 /* all else equal, wildcard is always a hit */
740                 return true;
741 
742         /* otherwise, we have to actually compare the data */
743 
744         BUG_ON(!IS_ALIGNED((unsigned long)val, len));
745 
746         switch (len) {
747         case 1:
748                 _val = *(u8 *)val;
749                 break;
750         case 2:
751                 _val = *(u16 *)val;
752                 break;
753         case 4:
754                 _val = *(u32 *)val;
755                 break;
756         case 8:
757                 _val = *(u64 *)val;
758                 break;
759         default:
760                 return false;
761         }
762 
763         return _val == p->datamatch;
764 }
765 
766 /* MMIO/PIO writes trigger an event if the addr/val match */
767 static int
768 ioeventfd_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr,
769                 int len, const void *val)
770 {
771         struct _ioeventfd *p = to_ioeventfd(this);
772 
773         if (!ioeventfd_in_range(p, addr, len, val))
774                 return -EOPNOTSUPP;
775 
776         eventfd_signal(p->eventfd);
777         return 0;
778 }
779 
780 /*
781  * This function is called as KVM is completely shutting down.  We do not
782  * need to worry about locking just nuke anything we have as quickly as possible
783  */
784 static void
785 ioeventfd_destructor(struct kvm_io_device *this)
786 {
787         struct _ioeventfd *p = to_ioeventfd(this);
788 
789         ioeventfd_release(p);
790 }
791 
792 static const struct kvm_io_device_ops ioeventfd_ops = {
793         .write      = ioeventfd_write,
794         .destructor = ioeventfd_destructor,
795 };
796 
797 /* assumes kvm->slots_lock held */
798 static bool
799 ioeventfd_check_collision(struct kvm *kvm, struct _ioeventfd *p)
800 {
801         struct _ioeventfd *_p;
802 
803         list_for_each_entry(_p, &kvm->ioeventfds, list)
804                 if (_p->bus_idx == p->bus_idx &&
805                     _p->addr == p->addr &&
806                     (!_p->length || !p->length ||
807                      (_p->length == p->length &&
808                       (_p->wildcard || p->wildcard ||
809                        _p->datamatch == p->datamatch))))
810                         return true;
811 
812         return false;
813 }
814 
815 static enum kvm_bus ioeventfd_bus_from_flags(__u32 flags)
816 {
817         if (flags & KVM_IOEVENTFD_FLAG_PIO)
818                 return KVM_PIO_BUS;
819         if (flags & KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY)
820                 return KVM_VIRTIO_CCW_NOTIFY_BUS;
821         return KVM_MMIO_BUS;
822 }
823 
824 static int kvm_assign_ioeventfd_idx(struct kvm *kvm,
825                                 enum kvm_bus bus_idx,
826                                 struct kvm_ioeventfd *args)
827 {
828 
829         struct eventfd_ctx *eventfd;
830         struct _ioeventfd *p;
831         int ret;
832 
833         eventfd = eventfd_ctx_fdget(args->fd);
834         if (IS_ERR(eventfd))
835                 return PTR_ERR(eventfd);
836 
837         p = kzalloc(sizeof(*p), GFP_KERNEL_ACCOUNT);
838         if (!p) {
839                 ret = -ENOMEM;
840                 goto fail;
841         }
842 
843         INIT_LIST_HEAD(&p->list);
844         p->addr    = args->addr;
845         p->bus_idx = bus_idx;
846         p->length  = args->len;
847         p->eventfd = eventfd;
848 
849         /* The datamatch feature is optional, otherwise this is a wildcard */
850         if (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH)
851                 p->datamatch = args->datamatch;
852         else
853                 p->wildcard = true;
854 
855         mutex_lock(&kvm->slots_lock);
856 
857         /* Verify that there isn't a match already */
858         if (ioeventfd_check_collision(kvm, p)) {
859                 ret = -EEXIST;
860                 goto unlock_fail;
861         }
862 
863         kvm_iodevice_init(&p->dev, &ioeventfd_ops);
864 
865         ret = kvm_io_bus_register_dev(kvm, bus_idx, p->addr, p->length,
866                                       &p->dev);
867         if (ret < 0)
868                 goto unlock_fail;
869 
870         kvm_get_bus(kvm, bus_idx)->ioeventfd_count++;
871         list_add_tail(&p->list, &kvm->ioeventfds);
872 
873         mutex_unlock(&kvm->slots_lock);
874 
875         return 0;
876 
877 unlock_fail:
878         mutex_unlock(&kvm->slots_lock);
879         kfree(p);
880 
881 fail:
882         eventfd_ctx_put(eventfd);
883 
884         return ret;
885 }
886 
887 static int
888 kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx,
889                            struct kvm_ioeventfd *args)
890 {
891         struct _ioeventfd        *p;
892         struct eventfd_ctx       *eventfd;
893         struct kvm_io_bus        *bus;
894         int                       ret = -ENOENT;
895         bool                      wildcard;
896 
897         eventfd = eventfd_ctx_fdget(args->fd);
898         if (IS_ERR(eventfd))
899                 return PTR_ERR(eventfd);
900 
901         wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH);
902 
903         mutex_lock(&kvm->slots_lock);
904 
905         list_for_each_entry(p, &kvm->ioeventfds, list) {
906                 if (p->bus_idx != bus_idx ||
907                     p->eventfd != eventfd  ||
908                     p->addr != args->addr  ||
909                     p->length != args->len ||
910                     p->wildcard != wildcard)
911                         continue;
912 
913                 if (!p->wildcard && p->datamatch != args->datamatch)
914                         continue;
915 
916                 kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
917                 bus = kvm_get_bus(kvm, bus_idx);
918                 if (bus)
919                         bus->ioeventfd_count--;
920                 ret = 0;
921                 break;
922         }
923 
924         mutex_unlock(&kvm->slots_lock);
925 
926         eventfd_ctx_put(eventfd);
927 
928         return ret;
929 }
930 
931 static int kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
932 {
933         enum kvm_bus bus_idx = ioeventfd_bus_from_flags(args->flags);
934         int ret = kvm_deassign_ioeventfd_idx(kvm, bus_idx, args);
935 
936         if (!args->len && bus_idx == KVM_MMIO_BUS)
937                 kvm_deassign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args);
938 
939         return ret;
940 }
941 
942 static int
943 kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
944 {
945         enum kvm_bus              bus_idx;
946         int ret;
947 
948         bus_idx = ioeventfd_bus_from_flags(args->flags);
949         /* must be natural-word sized, or 0 to ignore length */
950         switch (args->len) {
951         case 0:
952         case 1:
953         case 2:
954         case 4:
955         case 8:
956                 break;
957         default:
958                 return -EINVAL;
959         }
960 
961         /* check for range overflow */
962         if (args->addr + args->len < args->addr)
963                 return -EINVAL;
964 
965         /* check for extra flags that we don't understand */
966         if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK)
967                 return -EINVAL;
968 
969         /* ioeventfd with no length can't be combined with DATAMATCH */
970         if (!args->len && (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH))
971                 return -EINVAL;
972 
973         ret = kvm_assign_ioeventfd_idx(kvm, bus_idx, args);
974         if (ret)
975                 goto fail;
976 
977         /* When length is ignored, MMIO is also put on a separate bus, for
978          * faster lookups.
979          */
980         if (!args->len && bus_idx == KVM_MMIO_BUS) {
981                 ret = kvm_assign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args);
982                 if (ret < 0)
983                         goto fast_fail;
984         }
985 
986         return 0;
987 
988 fast_fail:
989         kvm_deassign_ioeventfd_idx(kvm, bus_idx, args);
990 fail:
991         return ret;
992 }
993 
994 int
995 kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
996 {
997         if (args->flags & KVM_IOEVENTFD_FLAG_DEASSIGN)
998                 return kvm_deassign_ioeventfd(kvm, args);
999 
1000         return kvm_assign_ioeventfd(kvm, args);
1001 }
1002 
1003 void
1004 kvm_eventfd_init(struct kvm *kvm)
1005 {
1006 #ifdef CONFIG_HAVE_KVM_IRQCHIP
1007         spin_lock_init(&kvm->irqfds.lock);
1008         INIT_LIST_HEAD(&kvm->irqfds.items);
1009         INIT_LIST_HEAD(&kvm->irqfds.resampler_list);
1010         mutex_init(&kvm->irqfds.resampler_lock);
1011 #endif
1012         INIT_LIST_HEAD(&kvm->ioeventfds);
1013 }
1014 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php