[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] x86/IRQ: make internally used IRQs also honor the pending EOI stack



At the time the pending EOI stack was introduced there were no
internally used IRQs which would have the LAPIC EOI issued from the
->end() hook. This had then changed with the introduction of IOMMUs,
but the interaction issue was presumably masked by
irq_guest_eoi_timer_fn() frequently EOI-ing interrupts way too early
(which got fixed by 359cf6f8a0ec ["x86/IRQ: don't keep EOI timer
running without need"]).

The problem is that with us re-enabling interrupts across handler
invocation, a higher priority (guest) interrupt may trigger while
handling a lower priority (internal) one. The EOI issued from
->end() (for ACKTYPE_EOI kind interrupts) would then mistakenly
EOI the higher priority (guest) interrupt, breaking (among other
things) pending EOI stack logic's assumptions.

Notes:

- In principle we could get away without the check_eoi_deferral flag.
  I've introduced it just to make sure there's as little change as
  possible to unaffected paths.
- Similarly the cpu_has_pending_apic_eoi() check in do_IRQ() isn't
  strictly necessary.
- The new function's name isn't very helpful with its use in
  end_level_ioapic_irq_new(). I did also consider eoi_APIC_irq() (to
  parallel ack_APIC_irq()), but then liked this even less.
- Other than I first thought serial console IRQs shouldn't be
  affected, as we run them on specifically allocated high priority
  vectors.

Reported-by: Igor Druzhinin <igor.druzhinin@xxxxxxxxxx>
Diagnosed-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>

--- a/xen/arch/x86/io_apic.c
+++ b/xen/arch/x86/io_apic.c
@@ -1734,7 +1734,7 @@ static void end_level_ioapic_irq_new(str
 
     v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
 
-    ack_APIC_irq();
+    end_nonmaskable_irq(desc, vector);
 
     if ( (desc->status & IRQ_MOVE_PENDING) &&
          !io_apic_level_ack_pending(desc->irq) )
--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -438,6 +438,7 @@ int __init init_irq_data(void)
 }
 
 static void __do_IRQ_guest(int vector);
+static void flush_ready_eoi(void);
 
 static void ack_none(struct irq_desc *desc)
 {
@@ -865,6 +866,7 @@ void pirq_set_affinity(struct domain *d,
 }
 
 DEFINE_PER_CPU(unsigned int, irq_count);
+static DEFINE_PER_CPU(bool, check_eoi_deferral);
 
 uint8_t alloc_hipriority_vector(void)
 {
@@ -1008,7 +1010,25 @@ void do_IRQ(struct cpu_user_regs *regs)
 
  out:
     if ( desc->handler->end )
+    {
+        /*
+         * If higher priority vectors still have their EOIs pending, we may
+         * not issue an EOI here, as this would EOI the highest priority one.
+         */
+        if ( cpu_has_pending_apic_eoi() )
+        {
+            this_cpu(check_eoi_deferral) = true;
+            desc->handler->end(desc, vector);
+            this_cpu(check_eoi_deferral) = false;
+
+            spin_unlock(&desc->lock);
+            flush_ready_eoi();
+            goto out_no_unlock;
+        }
+
         desc->handler->end(desc, vector);
+    }
+
  out_no_end:
     spin_unlock(&desc->lock);
  out_no_unlock:
@@ -1163,6 +1183,29 @@ bool cpu_has_pending_apic_eoi(void)
     return pending_eoi_sp(this_cpu(pending_eoi)) != 0;
 }
 
+void end_nonmaskable_irq(struct irq_desc *desc, uint8_t vector)
+{
+    struct pending_eoi *peoi = this_cpu(pending_eoi);
+    unsigned int sp = pending_eoi_sp(peoi);
+
+    if ( !this_cpu(check_eoi_deferral) || !sp || peoi[sp - 1].vector < vector )
+    {
+        ack_APIC_irq();
+        return;
+    }
+
+    /* Defer this vector's EOI until all higher ones have been EOI-ed. */
+    pending_eoi_sp(peoi) = sp + 1;
+    do {
+        peoi[sp] = peoi[sp - 1];
+    } while ( --sp && peoi[sp - 1].vector > vector );
+    ASSERT(!sp || peoi[sp - 1].vector < vector);
+
+    peoi[sp].irq = desc->irq;
+    peoi[sp].vector = vector;
+    peoi[sp].ready = 1;
+}
+
 static inline void set_pirq_eoi(struct domain *d, unsigned int irq)
 {
     if ( d->arch.pirq_eoi_map )
--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -512,11 +512,6 @@ static void ack_maskable_msi_irq(struct
     ack_APIC_irq(); /* ACKTYPE_NONE */
 }
 
-void end_nonmaskable_msi_irq(struct irq_desc *desc, u8 vector)
-{
-    ack_APIC_irq(); /* ACKTYPE_EOI */
-}
-
 /*
  * IRQ chip for MSI PCI/PCI-X/PCI-Express devices,
  * which implement the MSI or MSI-X capability structure.
@@ -539,7 +534,7 @@ static hw_irq_controller pci_msi_nonmask
     .enable       = irq_enable_none,
     .disable      = irq_disable_none,
     .ack          = ack_nonmaskable_msi_irq,
-    .end          = end_nonmaskable_msi_irq,
+    .end          = end_nonmaskable_irq,
     .set_affinity = set_msi_affinity
 };
 
--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -427,7 +427,7 @@ static unsigned int iommu_msi_startup(st
 static void iommu_msi_end(struct irq_desc *desc, u8 vector)
 {
     iommu_msi_unmask(desc);
-    ack_APIC_irq();
+    end_nonmaskable_irq(desc, vector);
 }
 
 
@@ -460,7 +460,7 @@ static void iommu_maskable_msi_shutdown(
  * maskable flavors here, as we want the ACK to be issued in ->end().
  */
 #define iommu_maskable_msi_ack ack_nonmaskable_msi_irq
-#define iommu_maskable_msi_end end_nonmaskable_msi_irq
+#define iommu_maskable_msi_end end_nonmaskable_irq
 
 static hw_irq_controller iommu_maskable_msi_type = {
     .typename = "IOMMU-M-MSI",
@@ -507,7 +507,7 @@ static hw_irq_controller iommu_x2apic_ty
     .enable       = irq_enable_none,
     .disable      = irq_disable_none,
     .ack          = ack_nonmaskable_msi_irq,
-    .end          = end_nonmaskable_msi_irq,
+    .end          = end_nonmaskable_irq,
     .set_affinity = set_x2apic_affinity,
 };
 
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -1042,7 +1042,7 @@ static void dma_msi_ack(struct irq_desc
 static void dma_msi_end(struct irq_desc *desc, u8 vector)
 {
     dma_msi_unmask(desc);
-    ack_APIC_irq();
+    end_nonmaskable_irq(desc, vector);
 }
 
 static void dma_msi_set_affinity(struct irq_desc *desc, const cpumask_t *mask)
--- a/xen/include/asm-x86/irq.h
+++ b/xen/include/asm-x86/irq.h
@@ -188,6 +188,7 @@ void move_masked_irq(struct irq_desc *);
 
 int bind_irq_vector(int irq, int vector, const cpumask_t *);
 
+void end_nonmaskable_irq(struct irq_desc *, uint8_t vector);
 void irq_set_affinity(struct irq_desc *, const cpumask_t *mask);
 
 int init_domain_irq_mapping(struct domain *);
--- a/xen/include/asm-x86/msi.h
+++ b/xen/include/asm-x86/msi.h
@@ -250,7 +250,6 @@ void mask_msi_irq(struct irq_desc *);
 void unmask_msi_irq(struct irq_desc *);
 void guest_mask_msi_irq(struct irq_desc *, bool mask);
 void ack_nonmaskable_msi_irq(struct irq_desc *);
-void end_nonmaskable_msi_irq(struct irq_desc *, u8 vector);
 void set_msi_affinity(struct irq_desc *, const cpumask_t *);
 
 #endif /* __ASM_MSI_H */

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.