[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] Fix Xen's interrupt acknowledgement routines on certain



# HG changeset patch
# User kaf24@xxxxxxxxxxxxxxxxxxxx
# Node ID 50f2558d6241c39fd29641a4a78b0a1169b5e038
# Parent  83e4c180f09439baaa89fee900d77b47969565d7
Fix Xen's interrupt acknowledgement routines on certain
(apparently broken) IO-APIC hardware:
 1. Do not mask/unmask the IO-APIC pin during normal ISR
    processing. This seems to have really bizarre side effects
    on some chipsets.
 2. Since we instead tickle the local APIC in the ->end
    irq hook function, it *must* run on the CPU that
    received the interrupt. Therefore we track which CPUs
    need to do final acknowledgement and IPI them if
    necessary to do so.

Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>
xen-unstable changeset: 9644:bb0dc0ae23bb1fe49c197f38951fc424eef2905e
xen-unstable date: Fri Apr 14 12:01:15 2006 +0100

New IO-APIC ACK method seems to cause problems on some systems
(e.g., Dell 1850). Disable it by default for now, but allow the
new mwethod to be tested by passing boot parameter 'new_ack'
to Xen.

You can tell which ACK method you are using because Xen prints
out "Using old ACK method" or "Using new ACK method" during boot.

This workaround can be removed if/when the problems with the new
ACK method are flushed out.

Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>
xen-unstable changeset: 9645:9a273aabb839dff897be39a581d716dfed952c79
xen-unstable date: Fri Apr 14 14:14:24 2006 +0100

Big fixes for the new IO-APIC acknowledging method. The problems
were:
 1. Some critical Xen interrupts could get blocked behind
    unacknowledged guest interrupts. This is avoided by making
    all Xen-bound interrrupts strictly higher priority.
 2. Interrupts must not only be EOIed on the CPU that received
    them, but also in reverse order when interrupts are nested.
    A whole load of logic has been added to ensure this.

There are two boot parameters relating to all this:
 'ioapic_ack=old' -- use the old IO-APIC ACK method
 'ioapic_ack=new' -- use the new IO-APIC ACK method (default)
 'force_intack'   -- periodically force acknowledgement of
                     interrupts (default is no; useful for debugging)

Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>
xen-unstable changeset: 9664:b39365343de08af6c76fa3492b2cffb436470b3f
xen-unstable date: Sat Apr 15 18:09:52 2006 +0100

Clean up new EOI ack method some more and fix unbinding
IRQ from guest (penidng EOIs must be forcibly flushed).

Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>
xen-unstable changeset: 9667:3c1cd09801c047008e529aa03b56059e00c1f4f2
xen-unstable date: Sun Apr 16 15:04:21 2006 +0100

This patch defines a test_and_clear bitop for cpumask_t pointers.
Also fixes "wrong pointer type" for type specific bitops by using
&foo[0] instead of &foo.

Signed-off-by: Jimi Xenidis <jimix@xxxxxxxxxx>
xen-unstable changeset: 9670:b550a93c6459c4f0f2a86b61a08768d040482d88
xen-unstable date: Wed Apr 19 18:24:49 2006 +0100

Replace &foo[0] with foo where the latter seems cleaner
(which is usually, and particularly when its an argument
to one of the bitops functions).

Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>
xen-unstable changeset: 9671:72f9c751d3ea1f17ff513cd7fc2cbe671a9af7c9
xen-unstable date: Wed Apr 19 18:32:20 2006 +0100

diff -r 83e4c180f094 -r 50f2558d6241 xen/arch/x86/i8259.c
--- a/xen/arch/x86/i8259.c      Fri Apr 28 14:04:56 2006 +0100
+++ b/xen/arch/x86/i8259.c      Fri Apr 28 14:06:33 2006 +0100
@@ -318,7 +318,7 @@ void __init init_8259A(int auto_eoi)
      * outb_p - this has to work on a wide range of PC hardware.
      */
     outb_p(0x11, 0x20);     /* ICW1: select 8259A-1 init */
-    outb_p(0x20 + 0, 0x21); /* ICW2: 8259A-1 IR0-7 mapped to 0x20-0x27 */
+    outb_p(FIRST_LEGACY_VECTOR + 0, 0x21); /* ICW2: 8259A-1 IR0-7 */
     outb_p(0x04, 0x21);     /* 8259A-1 (the master) has a slave on IR2 */
     if (auto_eoi)
         outb_p(0x03, 0x21); /* master does Auto EOI */
@@ -326,7 +326,7 @@ void __init init_8259A(int auto_eoi)
         outb_p(0x01, 0x21); /* master expects normal EOI */
 
     outb_p(0x11, 0xA0);     /* ICW1: select 8259A-2 init */
-    outb_p(0x20 + 8, 0xA1); /* ICW2: 8259A-2 IR0-7 mapped to 0x28-0x2f */
+    outb_p(FIRST_LEGACY_VECTOR + 8, 0xA1); /* ICW2: 8259A-2 IR0-7 */
     outb_p(0x02, 0xA1);     /* 8259A-2 is a slave on master's IR2 */
     outb_p(0x01, 0xA1);     /* (slave's support for AEOI in flat mode
                                is to be investigated) */
diff -r 83e4c180f094 -r 50f2558d6241 xen/arch/x86/io_apic.c
--- a/xen/arch/x86/io_apic.c    Fri Apr 28 14:04:56 2006 +0100
+++ b/xen/arch/x86/io_apic.c    Fri Apr 28 14:06:33 2006 +0100
@@ -665,11 +665,11 @@ static inline int IO_APIC_irq_trigger(in
 }
 
 /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
-u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 };
+u8 irq_vector[NR_IRQ_VECTORS] __read_mostly;
 
 int assign_irq_vector(int irq)
 {
-    static int current_vector = FIRST_DEVICE_VECTOR, offset = 0;
+    static int current_vector = FIRST_DYNAMIC_VECTOR, offset = 0;
 
     BUG_ON(irq >= NR_IRQ_VECTORS);
     if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0)
@@ -685,11 +685,11 @@ next:
     if (current_vector == 0x80)
         goto next;
 
-    if (current_vector >= FIRST_SYSTEM_VECTOR) {
+    if (current_vector > LAST_DYNAMIC_VECTOR) {
         offset++;
         if (!(offset%8))
             return -ENOSPC;
-        current_vector = FIRST_DEVICE_VECTOR + offset;
+        current_vector = FIRST_DYNAMIC_VECTOR + offset;
     }
 
     vector_irq[current_vector] = irq;
@@ -1329,10 +1329,25 @@ static unsigned int startup_level_ioapic
     return 0; /* don't check for pending */
 }
 
+int ioapic_ack_new = 1;
+static void setup_ioapic_ack(char *s)
+{
+    if ( !strcmp(s, "old") )
+        ioapic_ack_new = 0;
+    else if ( !strcmp(s, "new") )
+        ioapic_ack_new = 1;
+    else
+        printk("Unknown ioapic_ack value specified: '%s'\n", s);
+}
+custom_param("ioapic_ack", setup_ioapic_ack);
+
 static void mask_and_ack_level_ioapic_irq (unsigned int irq)
 {
     unsigned long v;
     int i;
+
+    if ( ioapic_ack_new )
+        return;
 
     mask_IO_APIC_irq(irq);
 /*
@@ -1371,7 +1386,51 @@ static void mask_and_ack_level_ioapic_ir
 
 static void end_level_ioapic_irq (unsigned int irq)
 {
-    unmask_IO_APIC_irq(irq);
+    unsigned long v;
+    int i;
+
+    if ( !ioapic_ack_new )
+    {
+        if ( !(irq_desc[IO_APIC_VECTOR(irq)].status & IRQ_DISABLED) )
+            unmask_IO_APIC_irq(irq);
+        return;
+    }
+
+/*
+ * It appears there is an erratum which affects at least version 0x11
+ * of I/O APIC (that's the 82093AA and cores integrated into various
+ * chipsets).  Under certain conditions a level-triggered interrupt is
+ * erroneously delivered as edge-triggered one but the respective IRR
+ * bit gets set nevertheless.  As a result the I/O unit expects an EOI
+ * message but it will never arrive and further interrupts are blocked
+ * from the source.  The exact reason is so far unknown, but the
+ * phenomenon was observed when two consecutive interrupt requests
+ * from a given source get delivered to the same CPU and the source is
+ * temporarily disabled in between.
+ *
+ * A workaround is to simulate an EOI message manually.  We achieve it
+ * by setting the trigger mode to edge and then to level when the edge
+ * trigger mode gets detected in the TMR of a local APIC for a
+ * level-triggered interrupt.  We mask the source for the time of the
+ * operation to prevent an edge-triggered interrupt escaping meanwhile.
+ * The idea is from Manfred Spraul.  --macro
+ */
+    i = IO_APIC_VECTOR(irq);
+
+    v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
+
+    ack_APIC_irq();
+
+    if (!(v & (1 << (i & 0x1f)))) {
+        atomic_inc(&irq_mis_count);
+        spin_lock(&ioapic_lock);
+        __mask_IO_APIC_irq(irq);
+        __edge_IO_APIC_irq(irq);
+        __level_IO_APIC_irq(irq);
+        if ( !(irq_desc[IO_APIC_VECTOR(irq)].status & IRQ_DISABLED) )
+            __unmask_IO_APIC_irq(irq);
+        spin_unlock(&ioapic_lock);
+    }
 }
 
 static unsigned int startup_edge_ioapic_vector(unsigned int vector)
@@ -1703,6 +1762,7 @@ void __init setup_IO_APIC(void)
         io_apic_irqs = ~PIC_IRQS;
 
     printk("ENABLING IO-APIC IRQs\n");
+    printk(" -> Using %s ACK method\n", ioapic_ack_new ? "new" : "old");
 
     /*
      * Set up IO-APIC IRQ routing.
@@ -1962,9 +2022,9 @@ int ioapic_guest_write(unsigned long phy
         return 0;
     }
 
-    if ( old_rte.vector >= FIRST_DEVICE_VECTOR )
+    if ( old_rte.vector >= FIRST_DYNAMIC_VECTOR )
         old_irq = vector_irq[old_rte.vector];
-    if ( new_rte.vector >= FIRST_DEVICE_VECTOR )
+    if ( new_rte.vector >= FIRST_DYNAMIC_VECTOR )
         new_irq = vector_irq[new_rte.vector];
 
     if ( (old_irq != new_irq) && (old_irq != -1) && IO_APIC_IRQ(old_irq) )
diff -r 83e4c180f094 -r 50f2558d6241 xen/arch/x86/irq.c
--- a/xen/arch/x86/irq.c        Fri Apr 28 14:04:56 2006 +0100
+++ b/xen/arch/x86/irq.c        Fri Apr 28 14:06:33 2006 +0100
@@ -148,8 +148,23 @@ typedef struct {
     u8 nr_guests;
     u8 in_flight;
     u8 shareable;
+    u8 ack_type;
+#define ACKTYPE_NONE   0     /* No final acknowledgement is required */
+#define ACKTYPE_UNMASK 1     /* Unmask PIC hardware (from any CPU)   */
+#define ACKTYPE_EOI    2     /* EOI on the CPU that was interrupted  */
+    cpumask_t cpu_eoi_map;   /* CPUs that need to EOI this interrupt */
     struct domain *guest[IRQ_MAX_GUESTS];
 } irq_guest_action_t;
+
+/*
+ * Stack of interrupts awaiting EOI on each CPU. These must be popped in
+ * order, as only the current highest-priority pending irq can be EOIed.
+ */
+static struct {
+    u8 vector; /* Vector awaiting EOI */
+    u8 ready;  /* Ready for EOI now?  */
+} pending_eoi[NR_CPUS][NR_VECTORS] __cacheline_aligned;
+#define pending_eoi_sp(cpu) (pending_eoi[cpu][NR_VECTORS-1].vector)
 
 static void __do_IRQ_guest(int vector)
 {
@@ -157,36 +172,209 @@ static void __do_IRQ_guest(int vector)
     irq_desc_t         *desc = &irq_desc[vector];
     irq_guest_action_t *action = (irq_guest_action_t *)desc->action;
     struct domain      *d;
-    int                 i;
+    int                 i, sp, cpu = smp_processor_id();
+
+    if ( unlikely(action->nr_guests == 0) )
+    {
+        /* An interrupt may slip through while freeing an ACKTYPE_EOI irq. */
+        ASSERT(action->ack_type == ACKTYPE_EOI);
+        ASSERT(desc->status & IRQ_DISABLED);
+        desc->handler->end(vector);
+        return;
+    }
+
+    if ( action->ack_type == ACKTYPE_EOI )
+    {
+        sp = pending_eoi_sp(cpu);
+        ASSERT((sp == 0) || (pending_eoi[cpu][sp-1].vector < vector));
+        ASSERT(sp < (NR_VECTORS-1));
+        pending_eoi[cpu][sp].vector = vector;
+        pending_eoi[cpu][sp].ready = 0;
+        pending_eoi_sp(cpu) = sp+1;
+        cpu_set(cpu, action->cpu_eoi_map);
+    }
 
     for ( i = 0; i < action->nr_guests; i++ )
     {
         d = action->guest[i];
-        if ( !test_and_set_bit(irq, &d->pirq_mask) )
+        if ( (action->ack_type != ACKTYPE_NONE) &&
+             !test_and_set_bit(irq, d->pirq_mask) )
             action->in_flight++;
         send_guest_pirq(d, irq);
     }
 }
 
+/* Flush all ready EOIs from the top of this CPU's pending-EOI stack. */
+static void flush_ready_eoi(void *unused)
+{
+    irq_desc_t *desc;
+    int         vector, sp, cpu = smp_processor_id();
+
+    ASSERT(!local_irq_is_enabled());
+
+    sp = pending_eoi_sp(cpu);
+
+    while ( (--sp >= 0) && pending_eoi[cpu][sp].ready )
+    {
+        vector = pending_eoi[cpu][sp].vector;
+        desc = &irq_desc[vector];
+        spin_lock(&desc->lock);
+        desc->handler->end(vector);
+        spin_unlock(&desc->lock);
+    }
+
+    pending_eoi_sp(cpu) = sp+1;
+}
+
+static void __set_eoi_ready(irq_desc_t *desc)
+{
+    irq_guest_action_t *action = (irq_guest_action_t *)desc->action;
+    int                 vector, sp, cpu = smp_processor_id();
+
+    vector = desc - irq_desc;
+
+    if ( !(desc->status & IRQ_GUEST) ||
+         (action->in_flight != 0) ||
+         !cpu_test_and_clear(cpu, action->cpu_eoi_map) )
+        return;
+
+    sp = pending_eoi_sp(cpu);
+    do {
+        ASSERT(sp > 0);
+    } while ( pending_eoi[cpu][--sp].vector != vector );
+    ASSERT(!pending_eoi[cpu][sp].ready);
+    pending_eoi[cpu][sp].ready = 1;
+}
+
+/* Mark specified IRQ as ready-for-EOI (if it really is) and attempt to EOI. */
+static void set_eoi_ready(void *data)
+{
+    irq_desc_t *desc = data;
+
+    ASSERT(!local_irq_is_enabled());
+
+    spin_lock(&desc->lock);
+    __set_eoi_ready(desc);
+    spin_unlock(&desc->lock);
+
+    flush_ready_eoi(NULL);
+}
+
+/*
+ * Forcibly flush all pending EOIs on this CPU by emulating end-of-ISR
+ * notifications from guests. The caller of this function must ensure that
+ * all CPUs execute flush_ready_eoi().
+ */
+static void flush_all_pending_eoi(void *unused)
+{
+    irq_desc_t         *desc;
+    irq_guest_action_t *action;
+    int                 i, vector, sp, cpu = smp_processor_id();
+
+    ASSERT(!local_irq_is_enabled());
+
+    sp = pending_eoi_sp(cpu);
+    while ( --sp >= 0 )
+    {
+        if ( pending_eoi[cpu][sp].ready )
+            continue;
+        vector = pending_eoi[cpu][sp].vector;
+        desc = &irq_desc[vector];
+        spin_lock(&desc->lock);
+        action = (irq_guest_action_t *)desc->action;
+        ASSERT(action->ack_type == ACKTYPE_EOI);
+        ASSERT(desc->status & IRQ_GUEST);
+        for ( i = 0; i < action->nr_guests; i++ )
+            clear_bit(vector_to_irq(vector), action->guest[i]->pirq_mask);
+        action->in_flight = 0;
+        spin_unlock(&desc->lock);
+    }
+
+    flush_ready_eoi(NULL);
+}
+
 int pirq_guest_unmask(struct domain *d)
 {
-    irq_desc_t    *desc;
-    unsigned int   pirq;
-    shared_info_t *s = d->shared_info;
+    irq_desc_t         *desc;
+    irq_guest_action_t *action;
+    cpumask_t           cpu_eoi_map = CPU_MASK_NONE;
+    unsigned int        pirq, cpu = smp_processor_id();
+    shared_info_t      *s = d->shared_info;
 
     for ( pirq = find_first_bit(d->pirq_mask, NR_PIRQS);
           pirq < NR_PIRQS;
           pirq = find_next_bit(d->pirq_mask, NR_PIRQS, pirq+1) )
     {
-        desc = &irq_desc[irq_to_vector(pirq)];
+        desc   = &irq_desc[irq_to_vector(pirq)];
+        action = (irq_guest_action_t *)desc->action;
+
         spin_lock_irq(&desc->lock);
-        if ( !test_bit(d->pirq_to_evtchn[pirq], &s->evtchn_mask[0]) &&
-             test_and_clear_bit(pirq, &d->pirq_mask) &&
-             (--((irq_guest_action_t *)desc->action)->in_flight == 0) )
-            desc->handler->end(irq_to_vector(pirq));
-        spin_unlock_irq(&desc->lock);
-    }
-
+
+        if ( !test_bit(d->pirq_to_evtchn[pirq], s->evtchn_mask) &&
+             test_and_clear_bit(pirq, d->pirq_mask) )
+        {
+            ASSERT(action->ack_type != ACKTYPE_NONE);
+            if ( --action->in_flight == 0 )
+            {
+                if ( action->ack_type == ACKTYPE_UNMASK )
+                    desc->handler->end(irq_to_vector(pirq));
+                cpu_eoi_map = action->cpu_eoi_map;
+            }
+        }
+
+        if ( cpu_test_and_clear(cpu, cpu_eoi_map) )
+        {
+            __set_eoi_ready(desc);
+            spin_unlock(&desc->lock);
+            flush_ready_eoi(NULL);
+            local_irq_enable();
+        }
+        else
+        {
+            spin_unlock_irq(&desc->lock);
+        }
+
+        if ( !cpus_empty(cpu_eoi_map) )
+        {
+            on_selected_cpus(cpu_eoi_map, set_eoi_ready, desc, 1, 0);
+            cpu_eoi_map = CPU_MASK_NONE;
+        }
+    }
+
+    return 0;
+}
+
+extern int ioapic_ack_new;
+int pirq_acktype(int irq)
+{
+    irq_desc_t  *desc;
+    unsigned int vector;
+
+    vector = irq_to_vector(irq);
+    if ( vector == 0 )
+        return ACKTYPE_NONE;
+
+    desc = &irq_desc[vector];
+
+    /*
+     * Edge-triggered IO-APIC interrupts need no final acknowledgement:
+     * we ACK early during interrupt processing.
+     */
+    if ( !strcmp(desc->handler->typename, "IO-APIC-edge") )
+        return ACKTYPE_NONE;
+
+    /* Legacy PIC interrupts can be acknowledged from any CPU. */
+    if ( !strcmp(desc->handler->typename, "XT-PIC") )
+        return ACKTYPE_UNMASK;
+
+    /*
+     * Level-triggered IO-APIC interrupts need to be acknowledged on the CPU
+     * on which they were received. This is because we tickle the LAPIC to EOI.
+     */
+    if ( !strcmp(desc->handler->typename, "IO-APIC-level") )
+        return ioapic_ack_new ? ACKTYPE_EOI : ACKTYPE_UNMASK;
+
+    BUG();
     return 0;
 }
 
@@ -202,6 +390,7 @@ int pirq_guest_bind(struct vcpu *v, int 
     if ( (irq < 0) || (irq >= NR_IRQS) )
         return -EINVAL;
 
+ retry:
     vector = irq_to_vector(irq);
     if ( vector == 0 )
         return -EINVAL;
@@ -230,10 +419,12 @@ int pirq_guest_bind(struct vcpu *v, int 
             goto out;
         }
 
-        action->nr_guests = 0;
-        action->in_flight = 0;
-        action->shareable = will_share;
-        
+        action->nr_guests   = 0;
+        action->in_flight   = 0;
+        action->shareable   = will_share;
+        action->ack_type    = pirq_acktype(irq);
+        action->cpu_eoi_map = CPU_MASK_NONE;
+
         desc->depth = 0;
         desc->status |= IRQ_GUEST;
         desc->status &= ~IRQ_DISABLED;
@@ -251,6 +442,18 @@ int pirq_guest_bind(struct vcpu *v, int 
         rc = -EBUSY;
         goto out;
     }
+    else if ( action->nr_guests == 0 )
+    {
+        /*
+         * Indicates that an ACKTYPE_EOI interrupt is being released.
+         * Wait for that to happen before continuing.
+         */
+        ASSERT(action->ack_type == ACKTYPE_EOI);
+        ASSERT(desc->status & IRQ_DISABLED);
+        spin_unlock_irqrestore(&desc->lock, flags);
+        cpu_relax();
+        goto retry;
+    }
 
     if ( action->nr_guests == IRQ_MAX_GUESTS )
     {
@@ -271,6 +474,7 @@ int pirq_guest_unbind(struct domain *d, 
     unsigned int        vector = irq_to_vector(irq);
     irq_desc_t         *desc = &irq_desc[vector];
     irq_guest_action_t *action;
+    cpumask_t           cpu_eoi_map;
     unsigned long       flags;
     int                 i;
 
@@ -280,28 +484,68 @@ int pirq_guest_unbind(struct domain *d, 
 
     action = (irq_guest_action_t *)desc->action;
 
-    if ( test_and_clear_bit(irq, &d->pirq_mask) &&
-         (--action->in_flight == 0) )
-        desc->handler->end(vector);
-
-    if ( action->nr_guests == 1 )
-    {
-        desc->action = NULL;
-        xfree(action);
-        desc->depth   = 1;
-        desc->status |= IRQ_DISABLED;
-        desc->status &= ~IRQ_GUEST;
-        desc->handler->shutdown(vector);
-    }
-    else
-    {
-        i = 0;
-        while ( action->guest[i] && (action->guest[i] != d) )
-            i++;
-        memmove(&action->guest[i], &action->guest[i+1], IRQ_MAX_GUESTS-i-1);
-        action->nr_guests--;
-    }
-
+    i = 0;
+    while ( action->guest[i] && (action->guest[i] != d) )
+        i++;
+    memmove(&action->guest[i], &action->guest[i+1], IRQ_MAX_GUESTS-i-1);
+    action->nr_guests--;
+
+    switch ( action->ack_type )
+    {
+    case ACKTYPE_UNMASK:
+        if ( test_and_clear_bit(irq, d->pirq_mask) &&
+             (--action->in_flight == 0) )
+            desc->handler->end(vector);
+        break;
+    case ACKTYPE_EOI:
+        /* NB. If #guests == 0 then we clear the eoi_map later on. */
+        if ( test_and_clear_bit(irq, d->pirq_mask) &&
+             (--action->in_flight == 0) &&
+             (action->nr_guests != 0) )
+        {
+            cpu_eoi_map = action->cpu_eoi_map;
+            spin_unlock_irqrestore(&desc->lock, flags);    
+            on_selected_cpus(cpu_eoi_map, set_eoi_ready, desc, 1, 0);
+            spin_lock_irqsave(&desc->lock, flags);
+        }
+        break;
+    }
+
+    BUG_ON(test_bit(irq, d->pirq_mask));
+
+    if ( action->nr_guests != 0 )
+        goto out;
+
+    BUG_ON(action->in_flight != 0);
+
+    /* Disabling IRQ before releasing the desc_lock avoids an IRQ storm. */
+    desc->depth   = 1;
+    desc->status |= IRQ_DISABLED;
+    desc->handler->disable(vector);
+
+    /*
+     * We may have a EOI languishing anywhere in one of the per-CPU
+     * EOI stacks. Forcibly flush the stack on every CPU where this might
+     * be the case.
+     */
+    cpu_eoi_map = action->cpu_eoi_map;
+    if ( !cpus_empty(cpu_eoi_map) )
+    {
+        BUG_ON(action->ack_type != ACKTYPE_EOI);
+        spin_unlock_irqrestore(&desc->lock, flags);
+        on_selected_cpus(cpu_eoi_map, flush_all_pending_eoi, NULL, 1, 1);
+        on_selected_cpus(cpu_online_map, flush_ready_eoi, NULL, 1, 1);
+        spin_lock_irqsave(&desc->lock, flags);
+    }
+
+    BUG_ON(!cpus_empty(action->cpu_eoi_map));
+
+    desc->action = NULL;
+    xfree(action);
+    desc->status &= ~IRQ_GUEST;
+    desc->handler->shutdown(vector);
+
+ out:
     spin_unlock_irqrestore(&desc->lock, flags);    
     return 0;
 }
@@ -373,3 +617,41 @@ static int __init setup_dump_irqs(void)
     return 0;
 }
 __initcall(setup_dump_irqs);
+
+static struct timer end_irq_timer[NR_CPUS];
+
+/*
+ * force_intack: Forcibly emit all pending EOIs on each CPU every second.
+ * Mainly useful for debugging or poking lazy guests ISRs.
+ */
+
+static void end_irq_timeout(void *unused)
+{
+    int cpu = smp_processor_id();
+
+    local_irq_disable();
+    flush_all_pending_eoi(NULL);
+    local_irq_enable();
+
+    on_selected_cpus(cpu_online_map, flush_ready_eoi, NULL, 1, 0);
+
+    set_timer(&end_irq_timer[cpu], NOW() + MILLISECS(1000));
+}
+
+static void __init __setup_irq_timeout(void *unused)
+{
+    int cpu = smp_processor_id();
+    init_timer(&end_irq_timer[cpu], end_irq_timeout, NULL, cpu);
+    set_timer(&end_irq_timer[cpu], NOW() + MILLISECS(1000));
+}
+
+static int force_intack;
+boolean_param("force_intack", force_intack);
+
+static int __init setup_irq_timeout(void)
+{
+    if ( force_intack )
+        on_each_cpu(__setup_irq_timeout, NULL, 1, 1);
+    return 0;
+}
+__initcall(setup_irq_timeout);
diff -r 83e4c180f094 -r 50f2558d6241 xen/arch/x86/physdev.c
--- a/xen/arch/x86/physdev.c    Fri Apr 28 14:04:56 2006 +0100
+++ b/xen/arch/x86/physdev.c    Fri Apr 28 14:06:33 2006 +0100
@@ -18,6 +18,9 @@ extern int
 extern int
 ioapic_guest_write(
     unsigned long physbase, unsigned int reg, u32 pval);
+extern int
+pirq_acktype(
+    int irq);
 
 /*
  * Demuxing hypercall.
@@ -43,8 +46,7 @@ long do_physdev_op(GUEST_HANDLE(physdev_
         if ( (irq < 0) || (irq >= NR_IRQS) )
             break;
         op.u.irq_status_query.flags = 0;
-        /* Edge-triggered interrupts don't need an explicit unmask downcall. */
-        if ( !strstr(irq_desc[irq_to_vector(irq)].handler->typename, "edge") )
+        if ( pirq_acktype(irq) != 0 )
             op.u.irq_status_query.flags |= PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY;
         ret = 0;
         break;
diff -r 83e4c180f094 -r 50f2558d6241 xen/arch/x86/smpboot.c
--- a/xen/arch/x86/smpboot.c    Fri Apr 28 14:04:56 2006 +0100
+++ b/xen/arch/x86/smpboot.c    Fri Apr 28 14:06:33 2006 +0100
@@ -41,6 +41,7 @@
 #include <xen/irq.h>
 #include <xen/delay.h>
 #include <xen/softirq.h>
+#include <xen/serial.h>
 #include <asm/current.h>
 #include <asm/mc146818rtc.h>
 #include <asm/desc.h>
@@ -1231,12 +1232,25 @@ void __init smp_cpus_done(unsigned int m
 
 void __init smp_intr_init(void)
 {
+       int irq, seridx;
+
        /*
         * IRQ0 must be given a fixed assignment and initialized,
         * because it's used before the IO-APIC is set up.
         */
-       irq_vector[0] = FIRST_DEVICE_VECTOR;
-       vector_irq[FIRST_DEVICE_VECTOR] = 0;
+       irq_vector[0] = FIRST_HIPRIORITY_VECTOR;
+       vector_irq[FIRST_HIPRIORITY_VECTOR] = 0;
+
+       /*
+        * Also ensure serial interrupts are high priority. We do not
+        * want them to be blocked by unacknowledged guest-bound interrupts.
+        */
+       for (seridx = 0; seridx < 2; seridx++) {
+               if ((irq = serial_irq(seridx)) < 0)
+                       continue;
+               irq_vector[irq] = FIRST_HIPRIORITY_VECTOR + seridx + 1;
+               vector_irq[FIRST_HIPRIORITY_VECTOR + seridx + 1] = irq;
+       }
 
        /* IPI for event checking. */
        set_intr_gate(EVENT_CHECK_VECTOR, event_check_interrupt);
diff -r 83e4c180f094 -r 50f2558d6241 xen/drivers/char/ns16550.c
--- a/xen/drivers/char/ns16550.c        Fri Apr 28 14:04:56 2006 +0100
+++ b/xen/drivers/char/ns16550.c        Fri Apr 28 14:06:33 2006 +0100
@@ -260,13 +260,20 @@ static void ns16550_endboot(struct seria
 #define ns16550_endboot NULL
 #endif
 
+static int ns16550_irq(struct serial_port *port)
+{
+    struct ns16550 *uart = port->uart;
+    return ((uart->irq > 0) ? uart->irq : -1);
+}
+
 static struct uart_driver ns16550_driver = {
     .init_preirq  = ns16550_init_preirq,
     .init_postirq = ns16550_init_postirq,
     .endboot      = ns16550_endboot,
     .tx_empty     = ns16550_tx_empty,
     .putc         = ns16550_putc,
-    .getc         = ns16550_getc
+    .getc         = ns16550_getc,
+    .irq          = ns16550_irq
 };
 
 static int parse_parity_char(int c)
diff -r 83e4c180f094 -r 50f2558d6241 xen/drivers/char/serial.c
--- a/xen/drivers/char/serial.c Fri Apr 28 14:04:56 2006 +0100
+++ b/xen/drivers/char/serial.c Fri Apr 28 14:06:33 2006 +0100
@@ -372,6 +372,15 @@ void serial_endboot(void)
             com[i].driver->endboot(&com[i]);
 }
 
+int serial_irq(int idx)
+{
+    if ( (idx >= 0) && (idx < ARRAY_SIZE(com)) &&
+         com[idx].driver && com[idx].driver->irq )
+        return com[idx].driver->irq(&com[idx]);
+
+    return -1;
+}
+
 void serial_register_uart(int idx, struct uart_driver *driver, void *uart)
 {
     /* Store UART-specific info. */
diff -r 83e4c180f094 -r 50f2558d6241 xen/include/asm-x86/irq.h
--- a/xen/include/asm-x86/irq.h Fri Apr 28 14:04:56 2006 +0100
+++ b/xen/include/asm-x86/irq.h Fri Apr 28 14:06:33 2006 +0100
@@ -11,8 +11,8 @@
 #define IO_APIC_IRQ(irq)    (((irq) >= 16) || ((1<<(irq)) & io_apic_irqs))
 #define IO_APIC_VECTOR(irq) (irq_vector[irq])
 
-#define LEGACY_VECTOR(irq)          ((irq) + FIRST_EXTERNAL_VECTOR)
-#define LEGACY_IRQ_FROM_VECTOR(vec) ((vec) - FIRST_EXTERNAL_VECTOR)
+#define LEGACY_VECTOR(irq)          ((irq) + FIRST_LEGACY_VECTOR)
+#define LEGACY_IRQ_FROM_VECTOR(vec) ((vec) - FIRST_LEGACY_VECTOR)
 
 #define irq_to_vector(irq)  \
     (IO_APIC_IRQ(irq) ? IO_APIC_VECTOR(irq) : LEGACY_VECTOR(irq))
diff -r 83e4c180f094 -r 50f2558d6241 
xen/include/asm-x86/mach-default/irq_vectors.h
--- a/xen/include/asm-x86/mach-default/irq_vectors.h    Fri Apr 28 14:04:56 
2006 +0100
+++ b/xen/include/asm-x86/mach-default/irq_vectors.h    Fri Apr 28 14:06:33 
2006 +0100
@@ -1,96 +1,36 @@
-/*
- * This file should contain #defines for all of the interrupt vector
- * numbers used by this architecture.
- *
- * In addition, there are some standard defines:
- *
- *     FIRST_EXTERNAL_VECTOR:
- *             The first free place for external interrupts
- *
- *     SYSCALL_VECTOR:
- *             The IRQ vector a syscall makes the user to kernel transition
- *             under.
- *
- *     TIMER_IRQ:
- *             The IRQ number the timer interrupt comes in at.
- *
- *     NR_IRQS:
- *             The total number of interrupt vectors (including all the
- *             architecture specific interrupts) needed.
- *
- */                    
 #ifndef _ASM_IRQ_VECTORS_H
 #define _ASM_IRQ_VECTORS_H
 
-/*
- * IDT vectors usable for external interrupt sources start
- * at 0x20:
- */
-#define FIRST_EXTERNAL_VECTOR  0x20
-
-#define HYPERCALL_VECTOR       0x82
-
-/*
- * Vectors 0x20-0x2f are used for ISA interrupts.
- */
-
-/*
- * Special IRQ vectors used by the SMP architecture, 0xf0-0xff
- *
- *  some of the following vectors are 'rare', they are merged
- *  into a single vector (CALL_FUNCTION_VECTOR) to save vector space.
- *  TLB, reschedule and local APIC vectors are performance-critical.
- *
- *  Vectors 0xf0-0xfa are free (reserved for future Linux use).
- */
+/* Processor-initiated interrupts are all high priority. */
 #define SPURIOUS_APIC_VECTOR   0xff
 #define ERROR_APIC_VECTOR      0xfe
 #define INVALIDATE_TLB_VECTOR  0xfd
 #define EVENT_CHECK_VECTOR     0xfc
 #define CALL_FUNCTION_VECTOR   0xfb
-
-#define THERMAL_APIC_VECTOR    0xf0
-/*
- * Local APIC timer IRQ vector is on a different priority level,
- * to work around the 'lost local interrupt if more than 2 IRQ
- * sources per level' errata.
- */
-#define LOCAL_TIMER_VECTOR     0xef
+#define THERMAL_APIC_VECTOR    0xfa
+#define LOCAL_TIMER_VECTOR     0xf9
 
 /*
- * First APIC vector available to drivers: (vectors 0x30-0xee)
- * we start at 0x31 to spread out vectors evenly between priority
- * levels. (0x80 is the syscall vector)
+ * High-priority dynamically-allocated vectors. For interrupts that
+ * must be higher priority than any guest-bound interrupt.
  */
-#define FIRST_DEVICE_VECTOR    0x31
-#define FIRST_SYSTEM_VECTOR    0xef
+#define FIRST_HIPRIORITY_VECTOR        0xf0
+#define LAST_HIPRIORITY_VECTOR  0xf8
 
-#define TIMER_IRQ 0
+/* Legacy PIC uses vectors 0xe0-0xef. */
+#define FIRST_LEGACY_VECTOR    0xe0
+#define LAST_LEGACY_VECTOR      0xef
 
-/*
- * 16 8259A IRQ's, 208 potential APIC interrupt sources.
- * Right now the APIC is mostly only used for SMP.
- * 256 vectors is an architectural limit. (we can have
- * more than 256 devices theoretically, but they will
- * have to use shared interrupts)
- * Since vectors 0x00-0x1f are used/reserved for the CPU,
- * the usable vector space is 0x20-0xff (224 vectors)
- */
+#define HYPERCALL_VECTOR       0x82
 
-/*
- * The maximum number of vectors supported by i386 processors
- * is limited to 256. For processors other than i386, NR_VECTORS
- * should be changed accordingly.
- */
+/* Dynamically-allocated vectors available to any driver. */
+#define FIRST_DYNAMIC_VECTOR   0x20
+#define LAST_DYNAMIC_VECTOR    0xdf
+
 #define NR_VECTORS 256
 
-#include "irq_vectors_limits.h"
-
-#define FPU_IRQ                        13
-
-#define        FIRST_VM86_IRQ          3
-#define LAST_VM86_IRQ          15
-#define invalid_vm86_irq(irq)  ((irq) < 3 || (irq) > 15)
-
+/* Limited by number of trap vectors. */
+#define NR_IRQS        NR_VECTORS
+#define NR_IRQ_VECTORS NR_IRQS
 
 #endif /* _ASM_IRQ_VECTORS_H */
diff -r 83e4c180f094 -r 50f2558d6241 xen/include/xen/cpumask.h
--- a/xen/include/xen/cpumask.h Fri Apr 28 14:04:56 2006 +0100
+++ b/xen/include/xen/cpumask.h Fri Apr 28 14:06:33 2006 +0100
@@ -111,6 +111,12 @@ static inline int __cpu_test_and_set(int
 static inline int __cpu_test_and_set(int cpu, cpumask_t *addr)
 {
        return test_and_set_bit(cpu, addr->bits);
+}
+
+#define cpu_test_and_clear(cpu, cpumask) __cpu_test_and_clear((cpu), 
&(cpumask))
+static inline int __cpu_test_and_clear(int cpu, cpumask_t *addr)
+{
+       return test_and_clear_bit(cpu, addr->bits);
 }
 
 #define cpus_and(dst, src1, src2) __cpus_and(&(dst), &(src1), &(src2), NR_CPUS)
diff -r 83e4c180f094 -r 50f2558d6241 xen/include/xen/serial.h
--- a/xen/include/xen/serial.h  Fri Apr 28 14:04:56 2006 +0100
+++ b/xen/include/xen/serial.h  Fri Apr 28 14:06:33 2006 +0100
@@ -57,6 +57,8 @@ struct uart_driver {
     void (*putc)(struct serial_port *, char);
     /* Get a character from the serial line: returns 0 if none available. */
     int  (*getc)(struct serial_port *, char *);
+    /* Get IRQ number for this port's serial line: returns -1 if none. */
+    int  (*irq)(struct serial_port *);
 };
 
 /* 'Serial handles' are composed from the following fields. */
@@ -99,6 +101,9 @@ void serial_end_sync(int handle);
 /* Return number of bytes headroom in transmit buffer. */
 int serial_tx_space(int handle);
 
+/* Return irq number for specified serial port (identified by index). */
+int serial_irq(int idx);
+
 /*
  * Initialisation and helper functions for uart drivers.
  */
diff -r 83e4c180f094 -r 50f2558d6241 
xen/include/asm-x86/mach-default/irq_vectors_limits.h
--- a/xen/include/asm-x86/mach-default/irq_vectors_limits.h     Fri Apr 28 
14:04:56 2006 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,8 +0,0 @@
-#ifndef _ASM_IRQ_VECTORS_LIMITS_H
-#define _ASM_IRQ_VECTORS_LIMITS_H
-
-/* Limited by number of trap vectors. */
-#define NR_IRQS        FIRST_SYSTEM_VECTOR
-#define NR_IRQ_VECTORS NR_IRQS
-
-#endif /* _ASM_IRQ_VECTORS_LIMITS_H */

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.