[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] x86: IRQ Migration logic enhancement.



# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1256564018 0
# Node ID 42e9682e4b6143462d8ebb8ba5bdd46144f90faf
# Parent  f0be14bb3f7a4d9e9d865b3a4c1433a25051ee87
x86: IRQ Migration logic enhancement.

To programme MSI's addr/vector safely, delay irq migration
operation before acking next interrupt. In this way, it should
avoid inconsistent interrupts generation due to non-atomic writing
addr and data registers about MSI.

Port the logic from Linux and tailor it for Xen.

Signed-off-by: Xiantao Zhang <xiantao.zhang@xxxxxxxxx>
---
 xen/arch/x86/hpet.c           |    1 
 xen/arch/x86/hvm/hvm.c        |    2 -
 xen/arch/x86/io_apic.c        |   42 ++++++++++++++++++++++++++++
 xen/arch/x86/irq.c            |   61 ++++++++++++++++++++++++++++++++++++++++++
 xen/arch/x86/msi.c            |    4 --
 xen/include/asm-x86/io_apic.h |    9 ++++++
 xen/include/asm-x86/irq.h     |    6 ++++
 xen/include/xen/irq.h         |    2 +
 8 files changed, 122 insertions(+), 5 deletions(-)

diff -r f0be14bb3f7a -r 42e9682e4b61 xen/arch/x86/hpet.c
--- a/xen/arch/x86/hpet.c       Mon Oct 26 13:26:43 2009 +0000
+++ b/xen/arch/x86/hpet.c       Mon Oct 26 13:33:38 2009 +0000
@@ -289,6 +289,7 @@ static void hpet_msi_ack(unsigned int ir
     struct irq_desc *desc = irq_to_desc(irq);
 
     irq_complete_move(&desc);
+    move_native_irq(irq);
     ack_APIC_irq();
 }
 
diff -r f0be14bb3f7a -r 42e9682e4b61 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c    Mon Oct 26 13:26:43 2009 +0000
+++ b/xen/arch/x86/hvm/hvm.c    Mon Oct 26 13:33:38 2009 +0000
@@ -243,7 +243,7 @@ void hvm_migrate_pirqs(struct vcpu *v)
             continue;
         irq = desc - irq_desc;
         ASSERT(MSI_IRQ(irq));
-        desc->handler->set_affinity(irq, *cpumask_of(v->processor));
+        irq_set_affinity(irq, *cpumask_of(v->processor));
         spin_unlock_irq(&desc->lock);
     }
     spin_unlock(&d->event_lock);
diff -r f0be14bb3f7a -r 42e9682e4b61 xen/arch/x86/io_apic.c
--- a/xen/arch/x86/io_apic.c    Mon Oct 26 13:26:43 2009 +0000
+++ b/xen/arch/x86/io_apic.c    Mon Oct 26 13:33:38 2009 +0000
@@ -1379,6 +1379,7 @@ static void ack_edge_ioapic_irq(unsigned
     struct irq_desc *desc = irq_to_desc(irq);
     
     irq_complete_move(&desc);
+    move_native_irq(irq);
 
     if ((desc->status & (IRQ_PENDING | IRQ_DISABLED))
         == (IRQ_PENDING | IRQ_DISABLED))
@@ -1418,6 +1419,38 @@ static void setup_ioapic_ack(char *s)
         printk("Unknown ioapic_ack value specified: '%s'\n", s);
 }
 custom_param("ioapic_ack", setup_ioapic_ack);
+
+static bool_t io_apic_level_ack_pending(unsigned int irq)
+{
+    struct irq_pin_list *entry;
+    unsigned long flags;
+
+    spin_lock_irqsave(&ioapic_lock, flags);
+    entry = &irq_2_pin[irq];
+    for (;;) {
+        unsigned int reg;
+        int pin;
+
+        if (!entry)
+            break;
+
+        pin = entry->pin;
+        if (pin == -1)
+            continue;
+        reg = io_apic_read(entry->apic, 0x10 + pin*2);
+        /* Is the remote IRR bit set? */
+        if (reg & IO_APIC_REDIR_REMOTE_IRR) {
+            spin_unlock_irqrestore(&ioapic_lock, flags);
+            return 1;
+        }
+        if (!entry->next)
+            break;
+        entry = irq_2_pin + entry->next;
+    }
+    spin_unlock_irqrestore(&ioapic_lock, flags);
+
+    return 0;
+}
 
 static void mask_and_ack_level_ioapic_irq (unsigned int irq)
 {
@@ -1456,6 +1489,10 @@ static void mask_and_ack_level_ioapic_ir
     v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
 
     ack_APIC_irq();
+    
+    if ((irq_desc[irq].status & IRQ_MOVE_PENDING) &&
+       !io_apic_level_ack_pending(irq))
+        move_native_irq(irq);
 
     if (!(v & (1 << (i & 0x1f)))) {
         atomic_inc(&irq_mis_count);
@@ -1503,6 +1540,10 @@ static void end_level_ioapic_irq (unsign
 
     ack_APIC_irq();
 
+    if ((irq_desc[irq].status & IRQ_MOVE_PENDING) &&
+            !io_apic_level_ack_pending(irq))
+        move_native_irq(irq);
+
     if (!(v & (1 << (i & 0x1f)))) {
         atomic_inc(&irq_mis_count);
         spin_lock(&ioapic_lock);
@@ -1564,6 +1605,7 @@ static void ack_msi_irq(unsigned int irq
     struct irq_desc *desc = irq_to_desc(irq);
 
     irq_complete_move(&desc);
+    move_native_irq(irq);
 
     if ( msi_maskable_irq(desc->msi_desc) )
         ack_APIC_irq(); /* ACKTYPE_NONE */
diff -r f0be14bb3f7a -r 42e9682e4b61 xen/arch/x86/irq.c
--- a/xen/arch/x86/irq.c        Mon Oct 26 13:26:43 2009 +0000
+++ b/xen/arch/x86/irq.c        Mon Oct 26 13:33:38 2009 +0000
@@ -453,6 +453,67 @@ void __setup_vector_irq(int cpu)
         vector = irq_to_vector(irq);
         per_cpu(vector_irq, cpu)[vector] = irq;
     }
+}
+
+void move_masked_irq(int irq)
+{
+       struct irq_desc *desc = irq_to_desc(irq);
+
+       if (likely(!(desc->status & IRQ_MOVE_PENDING)))
+               return;
+    
+    desc->status &= ~IRQ_MOVE_PENDING;
+
+    if (unlikely(cpus_empty(desc->pending_mask)))
+        return;
+
+    if (!desc->handler->set_affinity)
+        return;
+
+       /*
+        * If there was a valid mask to work with, please
+        * do the disable, re-program, enable sequence.
+        * This is *not* particularly important for level triggered
+        * but in a edge trigger case, we might be setting rte
+        * when an active trigger is comming in. This could
+        * cause some ioapics to mal-function.
+        * Being paranoid i guess!
+        *
+        * For correct operation this depends on the caller
+        * masking the irqs.
+        */
+    if (likely(cpus_intersects(desc->pending_mask, cpu_online_map)))
+        desc->handler->set_affinity(irq, desc->pending_mask);
+
+       cpus_clear(desc->pending_mask);
+}
+
+void move_native_irq(int irq)
+{
+    struct irq_desc *desc = irq_to_desc(irq);
+
+    if (likely(!(desc->status & IRQ_MOVE_PENDING)))
+        return;
+
+    if (unlikely(desc->status & IRQ_DISABLED))
+        return;
+
+    desc->handler->disable(irq);
+    move_masked_irq(irq);
+    desc->handler->enable(irq);
+}
+
+/* For re-setting irq interrupt affinity for specific irq */
+void irq_set_affinity(int irq, cpumask_t mask)
+{
+    struct irq_desc *desc = irq_to_desc(irq);
+    
+    if (!desc->handler->set_affinity)
+        return;
+    
+    ASSERT(spin_is_locked(&desc->lock));
+    desc->status |= IRQ_MOVE_PENDING;
+    cpus_copy(desc->pending_mask, mask);
 }
 
 asmlinkage void do_IRQ(struct cpu_user_regs *regs)
diff -r f0be14bb3f7a -r 42e9682e4b61 xen/arch/x86/msi.c
--- a/xen/arch/x86/msi.c        Mon Oct 26 13:26:43 2009 +0000
+++ b/xen/arch/x86/msi.c        Mon Oct 26 13:33:38 2009 +0000
@@ -231,7 +231,6 @@ static void write_msi_msg(struct msi_des
         u8 slot = PCI_SLOT(dev->devfn);
         u8 func = PCI_FUNC(dev->devfn);
 
-        mask_msi_irq(entry->irq);
         pci_conf_write32(bus, slot, func, msi_lower_address_reg(pos),
                          msg->address_lo);
         if ( entry->msi_attrib.is_64 )
@@ -244,7 +243,6 @@ static void write_msi_msg(struct msi_des
         else
             pci_conf_write16(bus, slot, func, msi_data_reg(pos, 0),
                              msg->data);
-        unmask_msi_irq(entry->irq);
         break;
     }
     case PCI_CAP_ID_MSIX:
@@ -252,13 +250,11 @@ static void write_msi_msg(struct msi_des
         void __iomem *base;
         base = entry->mask_base;
 
-        mask_msi_irq(entry->irq);
         writel(msg->address_lo,
                base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
         writel(msg->address_hi,
                base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
         writel(msg->data, base + PCI_MSIX_ENTRY_DATA_OFFSET);
-        unmask_msi_irq(entry->irq);
         break;
     }
     default:
diff -r f0be14bb3f7a -r 42e9682e4b61 xen/include/asm-x86/io_apic.h
--- a/xen/include/asm-x86/io_apic.h     Mon Oct 26 13:26:43 2009 +0000
+++ b/xen/include/asm-x86/io_apic.h     Mon Oct 26 13:33:38 2009 +0000
@@ -21,6 +21,15 @@
                + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK)))
 
 #define IO_APIC_ID(idx) (mp_ioapics[idx].mpc_apicid)
+
+/* I/O Unit Redirection Table */
+#define IO_APIC_REDIR_VECTOR_MASK   0x000FF
+#define IO_APIC_REDIR_DEST_LOGICAL  0x00800
+#define IO_APIC_REDIR_DEST_PHYSICAL 0x00000
+#define IO_APIC_REDIR_SEND_PENDING  (1 << 12)
+#define IO_APIC_REDIR_REMOTE_IRR    (1 << 14)
+#define IO_APIC_REDIR_LEVEL_TRIGGER (1 << 15)
+#define IO_APIC_REDIR_MASKED        (1 << 16)
 
 /*
  * The structure of the IO-APIC:
diff -r f0be14bb3f7a -r 42e9682e4b61 xen/include/asm-x86/irq.h
--- a/xen/include/asm-x86/irq.h Mon Oct 26 13:26:43 2009 +0000
+++ b/xen/include/asm-x86/irq.h Mon Oct 26 13:33:38 2009 +0000
@@ -138,6 +138,12 @@ int __assign_irq_vector(int irq, struct 
 
 int bind_irq_vector(int irq, int vector, cpumask_t domain);
 
+void move_native_irq(int irq);
+
+void move_masked_irq(int irq);
+
+void irq_set_affinity(int irq, cpumask_t mask);
+
 #define domain_pirq_to_irq(d, pirq) ((d)->arch.pirq_irq[pirq])
 #define domain_irq_to_pirq(d, irq) ((d)->arch.irq_pirq[irq])
 
diff -r f0be14bb3f7a -r 42e9682e4b61 xen/include/xen/irq.h
--- a/xen/include/xen/irq.h     Mon Oct 26 13:26:43 2009 +0000
+++ b/xen/include/xen/irq.h     Mon Oct 26 13:33:38 2009 +0000
@@ -24,6 +24,7 @@ struct irqaction {
 #define IRQ_REPLAY     8       /* IRQ has been replayed but not acked yet */
 #define IRQ_GUEST       16      /* IRQ is handled by guest OS(es) */
 #define IRQ_GUEST_EOI_PENDING 32 /* IRQ was disabled, pending a guest EOI */
+#define IRQ_MOVE_PENDING      64  /* IRQ is migrating to another CPUs */
 #define IRQ_PER_CPU     256     /* IRQ is per CPU */
 
 /* Special IRQ numbers. */
@@ -75,6 +76,7 @@ typedef struct irq_desc {
     int irq;
     spinlock_t lock;
     cpumask_t affinity;
+    cpumask_t pending_mask;  /* IRQ migration pending mask */
 
     /* irq ratelimit */
     s_time_t rl_quantum_start;

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.