~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/arm64/kvm/vgic/vgic.c

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0-only
  2 /*
  3  * Copyright (C) 2015, 2016 ARM Ltd.
  4  */
  5 
  6 #include <linux/interrupt.h>
  7 #include <linux/irq.h>
  8 #include <linux/kvm.h>
  9 #include <linux/kvm_host.h>
 10 #include <linux/list_sort.h>
 11 #include <linux/nospec.h>
 12 
 13 #include <asm/kvm_hyp.h>
 14 
 15 #include "vgic.h"
 16 
 17 #define CREATE_TRACE_POINTS
 18 #include "trace.h"
 19 
 20 struct vgic_global kvm_vgic_global_state __ro_after_init = {
 21         .gicv3_cpuif = STATIC_KEY_FALSE_INIT,
 22 };
 23 
 24 /*
 25  * Locking order is always:
 26  * kvm->lock (mutex)
 27  *   vcpu->mutex (mutex)
 28  *     kvm->arch.config_lock (mutex)
 29  *       its->cmd_lock (mutex)
 30  *         its->its_lock (mutex)
 31  *           vgic_cpu->ap_list_lock             must be taken with IRQs disabled
 32  *             vgic_dist->lpi_xa.xa_lock        must be taken with IRQs disabled
 33  *               vgic_irq->irq_lock             must be taken with IRQs disabled
 34  *
 35  * As the ap_list_lock might be taken from the timer interrupt handler,
 36  * we have to disable IRQs before taking this lock and everything lower
 37  * than it.
 38  *
 39  * The config_lock has additional ordering requirements:
 40  * kvm->slots_lock
 41  *   kvm->srcu
 42  *     kvm->arch.config_lock
 43  *
 44  * If you need to take multiple locks, always take the upper lock first,
 45  * then the lower ones, e.g. first take the its_lock, then the irq_lock.
 46  * If you are already holding a lock and need to take a higher one, you
 47  * have to drop the lower ranking lock first and re-acquire it after having
 48  * taken the upper one.
 49  *
 50  * When taking more than one ap_list_lock at the same time, always take the
 51  * lowest numbered VCPU's ap_list_lock first, so:
 52  *   vcpuX->vcpu_id < vcpuY->vcpu_id:
 53  *     raw_spin_lock(vcpuX->arch.vgic_cpu.ap_list_lock);
 54  *     raw_spin_lock(vcpuY->arch.vgic_cpu.ap_list_lock);
 55  *
 56  * Since the VGIC must support injecting virtual interrupts from ISRs, we have
 57  * to use the raw_spin_lock_irqsave/raw_spin_unlock_irqrestore versions of outer
 58  * spinlocks for any lock that may be taken while injecting an interrupt.
 59  */
 60 
 61 /*
 62  * Index the VM's xarray of mapped LPIs and return a reference to the IRQ
 63  * structure. The caller is expected to call vgic_put_irq() later once it's
 64  * finished with the IRQ.
 65  */
 66 static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid)
 67 {
 68         struct vgic_dist *dist = &kvm->arch.vgic;
 69         struct vgic_irq *irq = NULL;
 70 
 71         rcu_read_lock();
 72 
 73         irq = xa_load(&dist->lpi_xa, intid);
 74         if (!vgic_try_get_irq_kref(irq))
 75                 irq = NULL;
 76 
 77         rcu_read_unlock();
 78 
 79         return irq;
 80 }
 81 
 82 /*
 83  * This looks up the virtual interrupt ID to get the corresponding
 84  * struct vgic_irq. It also increases the refcount, so any caller is expected
 85  * to call vgic_put_irq() once it's finished with this IRQ.
 86  */
 87 struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
 88                               u32 intid)
 89 {
 90         /* SGIs and PPIs */
 91         if (intid <= VGIC_MAX_PRIVATE) {
 92                 intid = array_index_nospec(intid, VGIC_MAX_PRIVATE + 1);
 93                 return &vcpu->arch.vgic_cpu.private_irqs[intid];
 94         }
 95 
 96         /* SPIs */
 97         if (intid < (kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS)) {
 98                 intid = array_index_nospec(intid, kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS);
 99                 return &kvm->arch.vgic.spis[intid - VGIC_NR_PRIVATE_IRQS];
100         }
101 
102         /* LPIs */
103         if (intid >= VGIC_MIN_LPI)
104                 return vgic_get_lpi(kvm, intid);
105 
106         return NULL;
107 }
108 
109 /*
110  * We can't do anything in here, because we lack the kvm pointer to
111  * lock and remove the item from the lpi_list. So we keep this function
112  * empty and use the return value of kref_put() to trigger the freeing.
113  */
114 static void vgic_irq_release(struct kref *ref)
115 {
116 }
117 
118 void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
119 {
120         struct vgic_dist *dist = &kvm->arch.vgic;
121         unsigned long flags;
122 
123         if (irq->intid < VGIC_MIN_LPI)
124                 return;
125 
126         if (!kref_put(&irq->refcount, vgic_irq_release))
127                 return;
128 
129         xa_lock_irqsave(&dist->lpi_xa, flags);
130         __xa_erase(&dist->lpi_xa, irq->intid);
131         xa_unlock_irqrestore(&dist->lpi_xa, flags);
132 
133         kfree_rcu(irq, rcu);
134 }
135 
136 void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu)
137 {
138         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
139         struct vgic_irq *irq, *tmp;
140         unsigned long flags;
141 
142         raw_spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags);
143 
144         list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) {
145                 if (irq->intid >= VGIC_MIN_LPI) {
146                         raw_spin_lock(&irq->irq_lock);
147                         list_del(&irq->ap_list);
148                         irq->vcpu = NULL;
149                         raw_spin_unlock(&irq->irq_lock);
150                         vgic_put_irq(vcpu->kvm, irq);
151                 }
152         }
153 
154         raw_spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags);
155 }
156 
157 void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending)
158 {
159         WARN_ON(irq_set_irqchip_state(irq->host_irq,
160                                       IRQCHIP_STATE_PENDING,
161                                       pending));
162 }
163 
164 bool vgic_get_phys_line_level(struct vgic_irq *irq)
165 {
166         bool line_level;
167 
168         BUG_ON(!irq->hw);
169 
170         if (irq->ops && irq->ops->get_input_level)
171                 return irq->ops->get_input_level(irq->intid);
172 
173         WARN_ON(irq_get_irqchip_state(irq->host_irq,
174                                       IRQCHIP_STATE_PENDING,
175                                       &line_level));
176         return line_level;
177 }
178 
179 /* Set/Clear the physical active state */
180 void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active)
181 {
182 
183         BUG_ON(!irq->hw);
184         WARN_ON(irq_set_irqchip_state(irq->host_irq,
185                                       IRQCHIP_STATE_ACTIVE,
186                                       active));
187 }
188 
189 /**
190  * vgic_target_oracle - compute the target vcpu for an irq
191  *
192  * @irq:        The irq to route. Must be already locked.
193  *
194  * Based on the current state of the interrupt (enabled, pending,
195  * active, vcpu and target_vcpu), compute the next vcpu this should be
196  * given to. Return NULL if this shouldn't be injected at all.
197  *
198  * Requires the IRQ lock to be held.
199  */
200 static struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq)
201 {
202         lockdep_assert_held(&irq->irq_lock);
203 
204         /* If the interrupt is active, it must stay on the current vcpu */
205         if (irq->active)
206                 return irq->vcpu ? : irq->target_vcpu;
207 
208         /*
209          * If the IRQ is not active but enabled and pending, we should direct
210          * it to its configured target VCPU.
211          * If the distributor is disabled, pending interrupts shouldn't be
212          * forwarded.
213          */
214         if (irq->enabled && irq_is_pending(irq)) {
215                 if (unlikely(irq->target_vcpu &&
216                              !irq->target_vcpu->kvm->arch.vgic.enabled))
217                         return NULL;
218 
219                 return irq->target_vcpu;
220         }
221 
222         /* If neither active nor pending and enabled, then this IRQ should not
223          * be queued to any VCPU.
224          */
225         return NULL;
226 }
227 
228 /*
229  * The order of items in the ap_lists defines how we'll pack things in LRs as
230  * well, the first items in the list being the first things populated in the
231  * LRs.
232  *
233  * A hard rule is that active interrupts can never be pushed out of the LRs
234  * (and therefore take priority) since we cannot reliably trap on deactivation
235  * of IRQs and therefore they have to be present in the LRs.
236  *
237  * Otherwise things should be sorted by the priority field and the GIC
238  * hardware support will take care of preemption of priority groups etc.
239  *
240  * Return negative if "a" sorts before "b", 0 to preserve order, and positive
241  * to sort "b" before "a".
242  */
243 static int vgic_irq_cmp(void *priv, const struct list_head *a,
244                         const struct list_head *b)
245 {
246         struct vgic_irq *irqa = container_of(a, struct vgic_irq, ap_list);
247         struct vgic_irq *irqb = container_of(b, struct vgic_irq, ap_list);
248         bool penda, pendb;
249         int ret;
250 
251         /*
252          * list_sort may call this function with the same element when
253          * the list is fairly long.
254          */
255         if (unlikely(irqa == irqb))
256                 return 0;
257 
258         raw_spin_lock(&irqa->irq_lock);
259         raw_spin_lock_nested(&irqb->irq_lock, SINGLE_DEPTH_NESTING);
260 
261         if (irqa->active || irqb->active) {
262                 ret = (int)irqb->active - (int)irqa->active;
263                 goto out;
264         }
265 
266         penda = irqa->enabled && irq_is_pending(irqa);
267         pendb = irqb->enabled && irq_is_pending(irqb);
268 
269         if (!penda || !pendb) {
270                 ret = (int)pendb - (int)penda;
271                 goto out;
272         }
273 
274         /* Both pending and enabled, sort by priority */
275         ret = irqa->priority - irqb->priority;
276 out:
277         raw_spin_unlock(&irqb->irq_lock);
278         raw_spin_unlock(&irqa->irq_lock);
279         return ret;
280 }
281 
282 /* Must be called with the ap_list_lock held */
283 static void vgic_sort_ap_list(struct kvm_vcpu *vcpu)
284 {
285         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
286 
287         lockdep_assert_held(&vgic_cpu->ap_list_lock);
288 
289         list_sort(NULL, &vgic_cpu->ap_list_head, vgic_irq_cmp);
290 }
291 
292 /*
293  * Only valid injection if changing level for level-triggered IRQs or for a
294  * rising edge, and in-kernel connected IRQ lines can only be controlled by
295  * their owner.
296  */
297 static bool vgic_validate_injection(struct vgic_irq *irq, bool level, void *owner)
298 {
299         if (irq->owner != owner)
300                 return false;
301 
302         switch (irq->config) {
303         case VGIC_CONFIG_LEVEL:
304                 return irq->line_level != level;
305         case VGIC_CONFIG_EDGE:
306                 return level;
307         }
308 
309         return false;
310 }
311 
312 /*
313  * Check whether an IRQ needs to (and can) be queued to a VCPU's ap list.
314  * Do the queuing if necessary, taking the right locks in the right order.
315  * Returns true when the IRQ was queued, false otherwise.
316  *
317  * Needs to be entered with the IRQ lock already held, but will return
318  * with all locks dropped.
319  */
320 bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
321                            unsigned long flags) __releases(&irq->irq_lock)
322 {
323         struct kvm_vcpu *vcpu;
324 
325         lockdep_assert_held(&irq->irq_lock);
326 
327 retry:
328         vcpu = vgic_target_oracle(irq);
329         if (irq->vcpu || !vcpu) {
330                 /*
331                  * If this IRQ is already on a VCPU's ap_list, then it
332                  * cannot be moved or modified and there is no more work for
333                  * us to do.
334                  *
335                  * Otherwise, if the irq is not pending and enabled, it does
336                  * not need to be inserted into an ap_list and there is also
337                  * no more work for us to do.
338                  */
339                 raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
340 
341                 /*
342                  * We have to kick the VCPU here, because we could be
343                  * queueing an edge-triggered interrupt for which we
344                  * get no EOI maintenance interrupt. In that case,
345                  * while the IRQ is already on the VCPU's AP list, the
346                  * VCPU could have EOI'ed the original interrupt and
347                  * won't see this one until it exits for some other
348                  * reason.
349                  */
350                 if (vcpu) {
351                         kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
352                         kvm_vcpu_kick(vcpu);
353                 }
354                 return false;
355         }
356 
357         /*
358          * We must unlock the irq lock to take the ap_list_lock where
359          * we are going to insert this new pending interrupt.
360          */
361         raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
362 
363         /* someone can do stuff here, which we re-check below */
364 
365         raw_spin_lock_irqsave(&vcpu->arch.vgic_cpu.ap_list_lock, flags);
366         raw_spin_lock(&irq->irq_lock);
367 
368         /*
369          * Did something change behind our backs?
370          *
371          * There are two cases:
372          * 1) The irq lost its pending state or was disabled behind our
373          *    backs and/or it was queued to another VCPU's ap_list.
374          * 2) Someone changed the affinity on this irq behind our
375          *    backs and we are now holding the wrong ap_list_lock.
376          *
377          * In both cases, drop the locks and retry.
378          */
379 
380         if (unlikely(irq->vcpu || vcpu != vgic_target_oracle(irq))) {
381                 raw_spin_unlock(&irq->irq_lock);
382                 raw_spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock,
383                                            flags);
384 
385                 raw_spin_lock_irqsave(&irq->irq_lock, flags);
386                 goto retry;
387         }
388 
389         /*
390          * Grab a reference to the irq to reflect the fact that it is
391          * now in the ap_list. This is safe as the caller must already hold a
392          * reference on the irq.
393          */
394         vgic_get_irq_kref(irq);
395         list_add_tail(&irq->ap_list, &vcpu->arch.vgic_cpu.ap_list_head);
396         irq->vcpu = vcpu;
397 
398         raw_spin_unlock(&irq->irq_lock);
399         raw_spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock, flags);
400 
401         kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
402         kvm_vcpu_kick(vcpu);
403 
404         return true;
405 }
406 
407 /**
408  * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic
409  * @kvm:     The VM structure pointer
410  * @vcpu:    The CPU for PPIs or NULL for global interrupts
411  * @intid:   The INTID to inject a new state to.
412  * @level:   Edge-triggered:  true:  to trigger the interrupt
413  *                            false: to ignore the call
414  *           Level-sensitive  true:  raise the input signal
415  *                            false: lower the input signal
416  * @owner:   The opaque pointer to the owner of the IRQ being raised to verify
417  *           that the caller is allowed to inject this IRQ.  Userspace
418  *           injections will have owner == NULL.
419  *
420  * The VGIC is not concerned with devices being active-LOW or active-HIGH for
421  * level-sensitive interrupts.  You can think of the level parameter as 1
422  * being HIGH and 0 being LOW and all devices being active-HIGH.
423  */
424 int kvm_vgic_inject_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
425                         unsigned int intid, bool level, void *owner)
426 {
427         struct vgic_irq *irq;
428         unsigned long flags;
429         int ret;
430 
431         ret = vgic_lazy_init(kvm);
432         if (ret)
433                 return ret;
434 
435         if (!vcpu && intid < VGIC_NR_PRIVATE_IRQS)
436                 return -EINVAL;
437 
438         trace_vgic_update_irq_pending(vcpu ? vcpu->vcpu_idx : 0, intid, level);
439 
440         irq = vgic_get_irq(kvm, vcpu, intid);
441         if (!irq)
442                 return -EINVAL;
443 
444         raw_spin_lock_irqsave(&irq->irq_lock, flags);
445 
446         if (!vgic_validate_injection(irq, level, owner)) {
447                 /* Nothing to see here, move along... */
448                 raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
449                 vgic_put_irq(kvm, irq);
450                 return 0;
451         }
452 
453         if (irq->config == VGIC_CONFIG_LEVEL)
454                 irq->line_level = level;
455         else
456                 irq->pending_latch = true;
457 
458         vgic_queue_irq_unlock(kvm, irq, flags);
459         vgic_put_irq(kvm, irq);
460 
461         return 0;
462 }
463 
464 /* @irq->irq_lock must be held */
465 static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
466                             unsigned int host_irq,
467                             struct irq_ops *ops)
468 {
469         struct irq_desc *desc;
470         struct irq_data *data;
471 
472         /*
473          * Find the physical IRQ number corresponding to @host_irq
474          */
475         desc = irq_to_desc(host_irq);
476         if (!desc) {
477                 kvm_err("%s: no interrupt descriptor\n", __func__);
478                 return -EINVAL;
479         }
480         data = irq_desc_get_irq_data(desc);
481         while (data->parent_data)
482                 data = data->parent_data;
483 
484         irq->hw = true;
485         irq->host_irq = host_irq;
486         irq->hwintid = data->hwirq;
487         irq->ops = ops;
488         return 0;
489 }
490 
491 /* @irq->irq_lock must be held */
492 static inline void kvm_vgic_unmap_irq(struct vgic_irq *irq)
493 {
494         irq->hw = false;
495         irq->hwintid = 0;
496         irq->ops = NULL;
497 }
498 
499 int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
500                           u32 vintid, struct irq_ops *ops)
501 {
502         struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
503         unsigned long flags;
504         int ret;
505 
506         BUG_ON(!irq);
507 
508         raw_spin_lock_irqsave(&irq->irq_lock, flags);
509         ret = kvm_vgic_map_irq(vcpu, irq, host_irq, ops);
510         raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
511         vgic_put_irq(vcpu->kvm, irq);
512 
513         return ret;
514 }
515 
516 /**
517  * kvm_vgic_reset_mapped_irq - Reset a mapped IRQ
518  * @vcpu: The VCPU pointer
519  * @vintid: The INTID of the interrupt
520  *
521  * Reset the active and pending states of a mapped interrupt.  Kernel
522  * subsystems injecting mapped interrupts should reset their interrupt lines
523  * when we are doing a reset of the VM.
524  */
525 void kvm_vgic_reset_mapped_irq(struct kvm_vcpu *vcpu, u32 vintid)
526 {
527         struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
528         unsigned long flags;
529 
530         if (!irq->hw)
531                 goto out;
532 
533         raw_spin_lock_irqsave(&irq->irq_lock, flags);
534         irq->active = false;
535         irq->pending_latch = false;
536         irq->line_level = false;
537         raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
538 out:
539         vgic_put_irq(vcpu->kvm, irq);
540 }
541 
542 int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid)
543 {
544         struct vgic_irq *irq;
545         unsigned long flags;
546 
547         if (!vgic_initialized(vcpu->kvm))
548                 return -EAGAIN;
549 
550         irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
551         BUG_ON(!irq);
552 
553         raw_spin_lock_irqsave(&irq->irq_lock, flags);
554         kvm_vgic_unmap_irq(irq);
555         raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
556         vgic_put_irq(vcpu->kvm, irq);
557 
558         return 0;
559 }
560 
561 int kvm_vgic_get_map(struct kvm_vcpu *vcpu, unsigned int vintid)
562 {
563         struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
564         unsigned long flags;
565         int ret = -1;
566 
567         raw_spin_lock_irqsave(&irq->irq_lock, flags);
568         if (irq->hw)
569                 ret = irq->hwintid;
570         raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
571 
572         vgic_put_irq(vcpu->kvm, irq);
573         return ret;
574 }
575 
576 /**
577  * kvm_vgic_set_owner - Set the owner of an interrupt for a VM
578  *
579  * @vcpu:   Pointer to the VCPU (used for PPIs)
580  * @intid:  The virtual INTID identifying the interrupt (PPI or SPI)
581  * @owner:  Opaque pointer to the owner
582  *
583  * Returns 0 if intid is not already used by another in-kernel device and the
584  * owner is set, otherwise returns an error code.
585  */
586 int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner)
587 {
588         struct vgic_irq *irq;
589         unsigned long flags;
590         int ret = 0;
591 
592         if (!vgic_initialized(vcpu->kvm))
593                 return -EAGAIN;
594 
595         /* SGIs and LPIs cannot be wired up to any device */
596         if (!irq_is_ppi(intid) && !vgic_valid_spi(vcpu->kvm, intid))
597                 return -EINVAL;
598 
599         irq = vgic_get_irq(vcpu->kvm, vcpu, intid);
600         raw_spin_lock_irqsave(&irq->irq_lock, flags);
601         if (irq->owner && irq->owner != owner)
602                 ret = -EEXIST;
603         else
604                 irq->owner = owner;
605         raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
606 
607         return ret;
608 }
609 
610 /**
611  * vgic_prune_ap_list - Remove non-relevant interrupts from the list
612  *
613  * @vcpu: The VCPU pointer
614  *
615  * Go over the list of "interesting" interrupts, and prune those that we
616  * won't have to consider in the near future.
617  */
618 static void vgic_prune_ap_list(struct kvm_vcpu *vcpu)
619 {
620         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
621         struct vgic_irq *irq, *tmp;
622 
623         DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
624 
625 retry:
626         raw_spin_lock(&vgic_cpu->ap_list_lock);
627 
628         list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) {
629                 struct kvm_vcpu *target_vcpu, *vcpuA, *vcpuB;
630                 bool target_vcpu_needs_kick = false;
631 
632                 raw_spin_lock(&irq->irq_lock);
633 
634                 BUG_ON(vcpu != irq->vcpu);
635 
636                 target_vcpu = vgic_target_oracle(irq);
637 
638                 if (!target_vcpu) {
639                         /*
640                          * We don't need to process this interrupt any
641                          * further, move it off the list.
642                          */
643                         list_del(&irq->ap_list);
644                         irq->vcpu = NULL;
645                         raw_spin_unlock(&irq->irq_lock);
646 
647                         /*
648                          * This vgic_put_irq call matches the
649                          * vgic_get_irq_kref in vgic_queue_irq_unlock,
650                          * where we added the LPI to the ap_list. As
651                          * we remove the irq from the list, we drop
652                          * also drop the refcount.
653                          */
654                         vgic_put_irq(vcpu->kvm, irq);
655                         continue;
656                 }
657 
658                 if (target_vcpu == vcpu) {
659                         /* We're on the right CPU */
660                         raw_spin_unlock(&irq->irq_lock);
661                         continue;
662                 }
663 
664                 /* This interrupt looks like it has to be migrated. */
665 
666                 raw_spin_unlock(&irq->irq_lock);
667                 raw_spin_unlock(&vgic_cpu->ap_list_lock);
668 
669                 /*
670                  * Ensure locking order by always locking the smallest
671                  * ID first.
672                  */
673                 if (vcpu->vcpu_id < target_vcpu->vcpu_id) {
674                         vcpuA = vcpu;
675                         vcpuB = target_vcpu;
676                 } else {
677                         vcpuA = target_vcpu;
678                         vcpuB = vcpu;
679                 }
680 
681                 raw_spin_lock(&vcpuA->arch.vgic_cpu.ap_list_lock);
682                 raw_spin_lock_nested(&vcpuB->arch.vgic_cpu.ap_list_lock,
683                                       SINGLE_DEPTH_NESTING);
684                 raw_spin_lock(&irq->irq_lock);
685 
686                 /*
687                  * If the affinity has been preserved, move the
688                  * interrupt around. Otherwise, it means things have
689                  * changed while the interrupt was unlocked, and we
690                  * need to replay this.
691                  *
692                  * In all cases, we cannot trust the list not to have
693                  * changed, so we restart from the beginning.
694                  */
695                 if (target_vcpu == vgic_target_oracle(irq)) {
696                         struct vgic_cpu *new_cpu = &target_vcpu->arch.vgic_cpu;
697 
698                         list_del(&irq->ap_list);
699                         irq->vcpu = target_vcpu;
700                         list_add_tail(&irq->ap_list, &new_cpu->ap_list_head);
701                         target_vcpu_needs_kick = true;
702                 }
703 
704                 raw_spin_unlock(&irq->irq_lock);
705                 raw_spin_unlock(&vcpuB->arch.vgic_cpu.ap_list_lock);
706                 raw_spin_unlock(&vcpuA->arch.vgic_cpu.ap_list_lock);
707 
708                 if (target_vcpu_needs_kick) {
709                         kvm_make_request(KVM_REQ_IRQ_PENDING, target_vcpu);
710                         kvm_vcpu_kick(target_vcpu);
711                 }
712 
713                 goto retry;
714         }
715 
716         raw_spin_unlock(&vgic_cpu->ap_list_lock);
717 }
718 
719 static inline void vgic_fold_lr_state(struct kvm_vcpu *vcpu)
720 {
721         if (kvm_vgic_global_state.type == VGIC_V2)
722                 vgic_v2_fold_lr_state(vcpu);
723         else
724                 vgic_v3_fold_lr_state(vcpu);
725 }
726 
727 /* Requires the irq_lock to be held. */
728 static inline void vgic_populate_lr(struct kvm_vcpu *vcpu,
729                                     struct vgic_irq *irq, int lr)
730 {
731         lockdep_assert_held(&irq->irq_lock);
732 
733         if (kvm_vgic_global_state.type == VGIC_V2)
734                 vgic_v2_populate_lr(vcpu, irq, lr);
735         else
736                 vgic_v3_populate_lr(vcpu, irq, lr);
737 }
738 
739 static inline void vgic_clear_lr(struct kvm_vcpu *vcpu, int lr)
740 {
741         if (kvm_vgic_global_state.type == VGIC_V2)
742                 vgic_v2_clear_lr(vcpu, lr);
743         else
744                 vgic_v3_clear_lr(vcpu, lr);
745 }
746 
747 static inline void vgic_set_underflow(struct kvm_vcpu *vcpu)
748 {
749         if (kvm_vgic_global_state.type == VGIC_V2)
750                 vgic_v2_set_underflow(vcpu);
751         else
752                 vgic_v3_set_underflow(vcpu);
753 }
754 
755 /* Requires the ap_list_lock to be held. */
756 static int compute_ap_list_depth(struct kvm_vcpu *vcpu,
757                                  bool *multi_sgi)
758 {
759         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
760         struct vgic_irq *irq;
761         int count = 0;
762 
763         *multi_sgi = false;
764 
765         lockdep_assert_held(&vgic_cpu->ap_list_lock);
766 
767         list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
768                 int w;
769 
770                 raw_spin_lock(&irq->irq_lock);
771                 /* GICv2 SGIs can count for more than one... */
772                 w = vgic_irq_get_lr_count(irq);
773                 raw_spin_unlock(&irq->irq_lock);
774 
775                 count += w;
776                 *multi_sgi |= (w > 1);
777         }
778         return count;
779 }
780 
781 /* Requires the VCPU's ap_list_lock to be held. */
782 static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)
783 {
784         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
785         struct vgic_irq *irq;
786         int count;
787         bool multi_sgi;
788         u8 prio = 0xff;
789         int i = 0;
790 
791         lockdep_assert_held(&vgic_cpu->ap_list_lock);
792 
793         count = compute_ap_list_depth(vcpu, &multi_sgi);
794         if (count > kvm_vgic_global_state.nr_lr || multi_sgi)
795                 vgic_sort_ap_list(vcpu);
796 
797         count = 0;
798 
799         list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
800                 raw_spin_lock(&irq->irq_lock);
801 
802                 /*
803                  * If we have multi-SGIs in the pipeline, we need to
804                  * guarantee that they are all seen before any IRQ of
805                  * lower priority. In that case, we need to filter out
806                  * these interrupts by exiting early. This is easy as
807                  * the AP list has been sorted already.
808                  */
809                 if (multi_sgi && irq->priority > prio) {
810                         _raw_spin_unlock(&irq->irq_lock);
811                         break;
812                 }
813 
814                 if (likely(vgic_target_oracle(irq) == vcpu)) {
815                         vgic_populate_lr(vcpu, irq, count++);
816 
817                         if (irq->source)
818                                 prio = irq->priority;
819                 }
820 
821                 raw_spin_unlock(&irq->irq_lock);
822 
823                 if (count == kvm_vgic_global_state.nr_lr) {
824                         if (!list_is_last(&irq->ap_list,
825                                           &vgic_cpu->ap_list_head))
826                                 vgic_set_underflow(vcpu);
827                         break;
828                 }
829         }
830 
831         /* Nuke remaining LRs */
832         for (i = count ; i < kvm_vgic_global_state.nr_lr; i++)
833                 vgic_clear_lr(vcpu, i);
834 
835         if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
836                 vcpu->arch.vgic_cpu.vgic_v2.used_lrs = count;
837         else
838                 vcpu->arch.vgic_cpu.vgic_v3.used_lrs = count;
839 }
840 
841 static inline bool can_access_vgic_from_kernel(void)
842 {
843         /*
844          * GICv2 can always be accessed from the kernel because it is
845          * memory-mapped, and VHE systems can access GICv3 EL2 system
846          * registers.
847          */
848         return !static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif) || has_vhe();
849 }
850 
851 static inline void vgic_save_state(struct kvm_vcpu *vcpu)
852 {
853         if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
854                 vgic_v2_save_state(vcpu);
855         else
856                 __vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3);
857 }
858 
859 /* Sync back the hardware VGIC state into our emulation after a guest's run. */
860 void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
861 {
862         int used_lrs;
863 
864         /* An empty ap_list_head implies used_lrs == 0 */
865         if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head))
866                 return;
867 
868         if (can_access_vgic_from_kernel())
869                 vgic_save_state(vcpu);
870 
871         if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
872                 used_lrs = vcpu->arch.vgic_cpu.vgic_v2.used_lrs;
873         else
874                 used_lrs = vcpu->arch.vgic_cpu.vgic_v3.used_lrs;
875 
876         if (used_lrs)
877                 vgic_fold_lr_state(vcpu);
878         vgic_prune_ap_list(vcpu);
879 }
880 
881 static inline void vgic_restore_state(struct kvm_vcpu *vcpu)
882 {
883         if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
884                 vgic_v2_restore_state(vcpu);
885         else
886                 __vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3);
887 }
888 
889 /* Flush our emulation state into the GIC hardware before entering the guest. */
890 void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
891 {
892         /*
893          * If there are no virtual interrupts active or pending for this
894          * VCPU, then there is no work to do and we can bail out without
895          * taking any lock.  There is a potential race with someone injecting
896          * interrupts to the VCPU, but it is a benign race as the VCPU will
897          * either observe the new interrupt before or after doing this check,
898          * and introducing additional synchronization mechanism doesn't change
899          * this.
900          *
901          * Note that we still need to go through the whole thing if anything
902          * can be directly injected (GICv4).
903          */
904         if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head) &&
905             !vgic_supports_direct_msis(vcpu->kvm))
906                 return;
907 
908         DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
909 
910         if (!list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) {
911                 raw_spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock);
912                 vgic_flush_lr_state(vcpu);
913                 raw_spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock);
914         }
915 
916         if (can_access_vgic_from_kernel())
917                 vgic_restore_state(vcpu);
918 
919         if (vgic_supports_direct_msis(vcpu->kvm))
920                 vgic_v4_commit(vcpu);
921 }
922 
923 void kvm_vgic_load(struct kvm_vcpu *vcpu)
924 {
925         if (unlikely(!vgic_initialized(vcpu->kvm)))
926                 return;
927 
928         if (kvm_vgic_global_state.type == VGIC_V2)
929                 vgic_v2_load(vcpu);
930         else
931                 vgic_v3_load(vcpu);
932 }
933 
934 void kvm_vgic_put(struct kvm_vcpu *vcpu)
935 {
936         if (unlikely(!vgic_initialized(vcpu->kvm)))
937                 return;
938 
939         if (kvm_vgic_global_state.type == VGIC_V2)
940                 vgic_v2_put(vcpu);
941         else
942                 vgic_v3_put(vcpu);
943 }
944 
945 int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
946 {
947         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
948         struct vgic_irq *irq;
949         bool pending = false;
950         unsigned long flags;
951         struct vgic_vmcr vmcr;
952 
953         if (!vcpu->kvm->arch.vgic.enabled)
954                 return false;
955 
956         if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last)
957                 return true;
958 
959         vgic_get_vmcr(vcpu, &vmcr);
960 
961         raw_spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags);
962 
963         list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
964                 raw_spin_lock(&irq->irq_lock);
965                 pending = irq_is_pending(irq) && irq->enabled &&
966                           !irq->active &&
967                           irq->priority < vmcr.pmr;
968                 raw_spin_unlock(&irq->irq_lock);
969 
970                 if (pending)
971                         break;
972         }
973 
974         raw_spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags);
975 
976         return pending;
977 }
978 
979 void vgic_kick_vcpus(struct kvm *kvm)
980 {
981         struct kvm_vcpu *vcpu;
982         unsigned long c;
983 
984         /*
985          * We've injected an interrupt, time to find out who deserves
986          * a good kick...
987          */
988         kvm_for_each_vcpu(c, vcpu, kvm) {
989                 if (kvm_vgic_vcpu_pending_irq(vcpu)) {
990                         kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
991                         kvm_vcpu_kick(vcpu);
992                 }
993         }
994 }
995 
996 bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid)
997 {
998         struct vgic_irq *irq;
999         bool map_is_active;
1000         unsigned long flags;
1001 
1002         if (!vgic_initialized(vcpu->kvm))
1003                 return false;
1004 
1005         irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
1006         raw_spin_lock_irqsave(&irq->irq_lock, flags);
1007         map_is_active = irq->hw && irq->active;
1008         raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
1009         vgic_put_irq(vcpu->kvm, irq);
1010 
1011         return map_is_active;
1012 }
1013 
1014 /*
1015  * Level-triggered mapped IRQs are special because we only observe rising
1016  * edges as input to the VGIC.
1017  *
1018  * If the guest never acked the interrupt we have to sample the physical
1019  * line and set the line level, because the device state could have changed
1020  * or we simply need to process the still pending interrupt later.
1021  *
1022  * We could also have entered the guest with the interrupt active+pending.
1023  * On the next exit, we need to re-evaluate the pending state, as it could
1024  * otherwise result in a spurious interrupt by injecting a now potentially
1025  * stale pending state.
1026  *
1027  * If this causes us to lower the level, we have to also clear the physical
1028  * active state, since we will otherwise never be told when the interrupt
1029  * becomes asserted again.
1030  *
1031  * Another case is when the interrupt requires a helping hand on
1032  * deactivation (no HW deactivation, for example).
1033  */
1034 void vgic_irq_handle_resampling(struct vgic_irq *irq,
1035                                 bool lr_deactivated, bool lr_pending)
1036 {
1037         if (vgic_irq_is_mapped_level(irq)) {
1038                 bool resample = false;
1039 
1040                 if (unlikely(vgic_irq_needs_resampling(irq))) {
1041                         resample = !(irq->active || irq->pending_latch);
1042                 } else if (lr_pending || (lr_deactivated && irq->line_level)) {
1043                         irq->line_level = vgic_get_phys_line_level(irq);
1044                         resample = !irq->line_level;
1045                 }
1046 
1047                 if (resample)
1048                         vgic_irq_set_phys_active(irq, false);
1049         }
1050 }
1051 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php