[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] Fix Xen's interrupt acknowledgement routines on certain



# HG changeset patch
# User kaf24@xxxxxxxxxxxxxxxxxxxx
# Node ID bb0dc0ae23bb1fe49c197f38951fc424eef2905e
# Parent  2ccaa3879417ba40b112fcf9d1ef4d45c82e25ca
Fix Xen's interrupt acknowledgement routines on certain
(apparently broken) IO-APIC hardware:
 1. Do not mask/unmask the IO-APIC pin during normal ISR
    processing. This seems to have really bizarre side effects
    on some chipsets.
 2. Since we instead tickle the local APIC in the ->end
    irq hook function, it *must* run on the CPU that
    received the interrupt. Therefore we track which CPUs
    need to do final acknowledgement and IPI them if
    necessary to do so.

Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>

diff -r 2ccaa3879417 -r bb0dc0ae23bb xen/arch/x86/io_apic.c
--- a/xen/arch/x86/io_apic.c    Fri Apr 14 10:58:49 2006
+++ b/xen/arch/x86/io_apic.c    Fri Apr 14 11:01:15 2006
@@ -190,16 +190,16 @@
     __modify_IO_APIC_irq(irq, 0, 0x00010000);
 }
 
-/* trigger = 0 */
-static void __edge_IO_APIC_irq (unsigned int irq)
-{
-    __modify_IO_APIC_irq(irq, 0, 0x00008000);
-}
-
-/* trigger = 1 */
-static void __level_IO_APIC_irq (unsigned int irq)
-{
-    __modify_IO_APIC_irq(irq, 0x00008000, 0);
+/* mask = 1, trigger = 0 */
+static void __mask_and_edge_IO_APIC_irq (unsigned int irq)
+{
+    __modify_IO_APIC_irq(irq, 0x00010000, 0x00008000);
+}
+
+/* mask = 0, trigger = 1 */
+static void __unmask_and_level_IO_APIC_irq (unsigned int irq)
+{
+    __modify_IO_APIC_irq(irq, 0x00008000, 0x00010000);
 }
 
 static void mask_IO_APIC_irq (unsigned int irq)
@@ -1323,10 +1323,13 @@
 
 static void mask_and_ack_level_ioapic_irq (unsigned int irq)
 {
+}
+
+static void end_level_ioapic_irq (unsigned int irq)
+{
     unsigned long v;
     int i;
 
-    mask_IO_APIC_irq(irq);
 /*
  * It appears there is an erratum which affects at least version 0x11
  * of I/O APIC (that's the 82093AA and cores integrated into various
@@ -1355,15 +1358,10 @@
     if (!(v & (1 << (i & 0x1f)))) {
         atomic_inc(&irq_mis_count);
         spin_lock(&ioapic_lock);
-        __edge_IO_APIC_irq(irq);
-        __level_IO_APIC_irq(irq);
+        __mask_and_edge_IO_APIC_irq(irq);
+        __unmask_and_level_IO_APIC_irq(irq);
         spin_unlock(&ioapic_lock);
     }
-}
-
-static void end_level_ioapic_irq (unsigned int irq)
-{
-    unmask_IO_APIC_irq(irq);
 }
 
 static unsigned int startup_edge_ioapic_vector(unsigned int vector)
diff -r 2ccaa3879417 -r bb0dc0ae23bb xen/arch/x86/irq.c
--- a/xen/arch/x86/irq.c        Fri Apr 14 10:58:49 2006
+++ b/xen/arch/x86/irq.c        Fri Apr 14 11:01:15 2006
@@ -148,6 +148,11 @@
     u8 nr_guests;
     u8 in_flight;
     u8 shareable;
+    u8 ack_type;
+#define ACKTYPE_NONE   0 /* Final ACK is not required */
+#define ACKTYPE_SINGLE 1 /* Final ACK on any CPU */
+#define ACKTYPE_MULTI  2 /* Final ACK on the CPU that was interrupted */
+    cpumask_t cpu_ack_map;
     struct domain *guest[IRQ_MAX_GUESTS];
 } irq_guest_action_t;
 
@@ -159,35 +164,109 @@
     struct domain      *d;
     int                 i;
 
+    if ( unlikely(action->nr_guests == 0) )
+    {
+        /* An interrupt may slip through while freeing an ACKTYPE_MULTI irq. */
+        ASSERT(action->ack_type == ACKTYPE_MULTI);
+        desc->handler->end(vector);
+        return;
+    }
+
+    if ( action->ack_type == ACKTYPE_MULTI )
+        cpu_set(smp_processor_id(), action->cpu_ack_map);
+
     for ( i = 0; i < action->nr_guests; i++ )
     {
         d = action->guest[i];
-        if ( !test_and_set_bit(irq, &d->pirq_mask) )
+        if ( (action->ack_type != ACKTYPE_NONE) &&
+             !test_and_set_bit(irq, &d->pirq_mask) )
             action->in_flight++;
         send_guest_pirq(d, irq);
     }
 }
 
+static void end_guest_irq(void *data)
+{
+    irq_desc_t         *desc = data;
+    irq_guest_action_t *action = (irq_guest_action_t *)desc->action;
+    unsigned long       flags;
+
+    spin_lock_irqsave(&desc->lock, flags);
+    if ( (desc->status & IRQ_GUEST) &&
+         (action->in_flight == 0) &&
+         test_and_clear_bit(smp_processor_id(), &action->cpu_ack_map) )
+        desc->handler->end(desc - irq_desc);
+    spin_unlock_irqrestore(&desc->lock, flags);    
+}
+
 int pirq_guest_unmask(struct domain *d)
 {
-    irq_desc_t    *desc;
-    unsigned int   pirq;
-    shared_info_t *s = d->shared_info;
+    irq_desc_t         *desc;
+    irq_guest_action_t *action;
+    cpumask_t           cpu_ack_map = CPU_MASK_NONE;
+    unsigned int        pirq, cpu = smp_processor_id();
+    shared_info_t      *s = d->shared_info;
 
     for ( pirq = find_first_bit(d->pirq_mask, NR_PIRQS);
           pirq < NR_PIRQS;
           pirq = find_next_bit(d->pirq_mask, NR_PIRQS, pirq+1) )
     {
-        desc = &irq_desc[irq_to_vector(pirq)];
+        desc   = &irq_desc[irq_to_vector(pirq)];
+        action = (irq_guest_action_t *)desc->action;
+
         spin_lock_irq(&desc->lock);
         if ( !test_bit(d->pirq_to_evtchn[pirq], &s->evtchn_mask[0]) &&
-             test_and_clear_bit(pirq, &d->pirq_mask) &&
-             (--((irq_guest_action_t *)desc->action)->in_flight == 0) )
-            desc->handler->end(irq_to_vector(pirq));
+             test_and_clear_bit(pirq, &d->pirq_mask) )
+        {
+            ASSERT(action->ack_type != ACKTYPE_NONE);
+            if ( --action->in_flight == 0 )
+            {
+                if ( (action->ack_type == ACKTYPE_SINGLE) ||
+                     test_and_clear_bit(cpu, &action->cpu_ack_map) )
+                    desc->handler->end(irq_to_vector(pirq));
+                cpu_ack_map = action->cpu_ack_map;
+            }
+        }
         spin_unlock_irq(&desc->lock);
+
+        if ( !cpus_empty(cpu_ack_map) )
+        {
+            on_selected_cpus(cpu_ack_map, end_guest_irq, desc, 1, 0);
+            cpu_ack_map = CPU_MASK_NONE;
+        }
     }
 
     return 0;
+}
+
+int pirq_acktype(int irq)
+{
+    irq_desc_t  *desc;
+    unsigned int vector;
+
+    vector = irq_to_vector(irq);
+    if ( vector == 0 )
+        return ACKTYPE_NONE;
+
+    desc = &irq_desc[vector];
+
+    /*
+     * Edge-triggered IO-APIC interrupts need no final acknowledgement:
+     * we ACK early during interrupt processing.
+     */
+    if ( !strcmp(desc->handler->typename, "IO-APIC-edge") )
+        return ACKTYPE_NONE;
+
+    /* Legacy PIC interrupts can be acknowledged from any CPU. */
+    if ( !strcmp(desc->handler->typename, "XT-PIC") )
+        return ACKTYPE_SINGLE;
+
+    /*
+     * By default assume that an interrupt must be finally acknowledged on
+     * the CPU on which it was received. This is true for level-triggered
+     * IO-APIC interrupts, for example, where we tickle the LAPIC to EOI.
+     */
+    return ACKTYPE_MULTI;
 }
 
 int pirq_guest_bind(struct vcpu *v, int irq, int will_share)
@@ -230,10 +309,12 @@
             goto out;
         }
 
-        action->nr_guests = 0;
-        action->in_flight = 0;
-        action->shareable = will_share;
-        
+        action->nr_guests   = 0;
+        action->in_flight   = 0;
+        action->shareable   = will_share;
+        action->ack_type    = pirq_acktype(irq);
+        action->cpu_ack_map = CPU_MASK_NONE;
+
         desc->depth = 0;
         desc->status |= IRQ_GUEST;
         desc->status &= ~IRQ_DISABLED;
@@ -271,6 +352,7 @@
     unsigned int        vector = irq_to_vector(irq);
     irq_desc_t         *desc = &irq_desc[vector];
     irq_guest_action_t *action;
+    cpumask_t           cpu_ack_map;
     unsigned long       flags;
     int                 i;
 
@@ -280,28 +362,60 @@
 
     action = (irq_guest_action_t *)desc->action;
 
-    if ( test_and_clear_bit(irq, &d->pirq_mask) &&
-         (--action->in_flight == 0) )
-        desc->handler->end(vector);
-
-    if ( action->nr_guests == 1 )
-    {
-        desc->action = NULL;
-        xfree(action);
-        desc->depth   = 1;
-        desc->status |= IRQ_DISABLED;
-        desc->status &= ~IRQ_GUEST;
-        desc->handler->shutdown(vector);
-    }
-    else
-    {
-        i = 0;
-        while ( action->guest[i] && (action->guest[i] != d) )
-            i++;
-        memmove(&action->guest[i], &action->guest[i+1], IRQ_MAX_GUESTS-i-1);
-        action->nr_guests--;
-    }
-
+    i = 0;
+    while ( action->guest[i] && (action->guest[i] != d) )
+        i++;
+    memmove(&action->guest[i], &action->guest[i+1], IRQ_MAX_GUESTS-i-1);
+    action->nr_guests--;
+
+    switch ( action->ack_type )
+    {
+    case ACKTYPE_SINGLE:
+        if ( test_and_clear_bit(irq, &d->pirq_mask) &&
+             (--action->in_flight == 0) )
+            desc->handler->end(vector);
+        break;
+    case ACKTYPE_MULTI:
+        if ( test_and_clear_bit(irq, &d->pirq_mask) )
+            --action->in_flight;
+        while ( action->in_flight == 0 )
+        {
+            /* We cannot release guest info until all pending ACKs are done. */
+            cpu_ack_map = action->cpu_ack_map;
+            if ( cpus_empty(cpu_ack_map) )
+                break;
+
+            /* We cannot hold the lock while interrupting other CPUs. */
+            spin_unlock_irqrestore(&desc->lock, flags);    
+            on_selected_cpus(cpu_ack_map, end_guest_irq, desc, 1, 1);
+            spin_lock_irqsave(&desc->lock, flags);
+
+            /* The world can change while we do not hold the lock. */
+            if ( !(desc->status & IRQ_GUEST) )
+                goto out;
+            if ( (action->ack_type != ACKTYPE_MULTI) ||
+                 (action->nr_guests != 0) )
+                break;
+        }
+        break;
+    }
+
+    BUG_ON(test_bit(irq, &d->pirq_mask));
+
+    if ( action->nr_guests != 0 )
+        goto out;
+
+    BUG_ON(action->in_flight != 0);
+    BUG_ON(!cpus_empty(action->cpu_ack_map));
+
+    desc->action = NULL;
+    xfree(action);
+    desc->depth   = 1;
+    desc->status |= IRQ_DISABLED;
+    desc->status &= ~IRQ_GUEST;
+    desc->handler->shutdown(vector);
+
+ out:
     spin_unlock_irqrestore(&desc->lock, flags);    
     return 0;
 }
diff -r 2ccaa3879417 -r bb0dc0ae23bb xen/arch/x86/physdev.c
--- a/xen/arch/x86/physdev.c    Fri Apr 14 10:58:49 2006
+++ b/xen/arch/x86/physdev.c    Fri Apr 14 11:01:15 2006
@@ -18,6 +18,9 @@
 extern int
 ioapic_guest_write(
     unsigned long physbase, unsigned int reg, u32 pval);
+extern int
+pirq_acktype(
+    int irq);
 
 /*
  * Demuxing hypercall.
@@ -43,8 +46,7 @@
         if ( (irq < 0) || (irq >= NR_IRQS) )
             break;
         op.u.irq_status_query.flags = 0;
-        /* Edge-triggered interrupts don't need an explicit unmask downcall. */
-        if ( !strstr(irq_desc[irq_to_vector(irq)].handler->typename, "edge") )
+        if ( pirq_acktype(irq) != 0 )
             op.u.irq_status_query.flags |= PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY;
         ret = 0;
         break;

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.