[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] x86: Make the hypercall PHYSDEVOP_alloc_irq_vector hypercall dummy.



# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1251801271 -3600
# Node ID a7de5bd776ca392f6059acd29771bddbcbc74ba1
# Parent  a020d0cdf594e36eac03c88abe061118d917c32f
x86: Make the hypercall PHYSDEVOP_alloc_irq_vector hypercall dummy.

This patch tends to make the hypercall PHYSDEVOP_alloc_irq_vector
dummy, and defer vector allocation to programe ioapic entries by
dom0. Basically, dom0 shouldn't touch vector namespace which is only
used by hypervisor for servicing real device's interrupts. And this
patch also makes broken NetBSD dom0 work again.

Signed-off-by: Xiantao Zhang <xiantao.zhang.intel.com>
---
 xen/arch/x86/io_apic.c |  236 +++++++++++++++++--------------------------------
 xen/arch/x86/irq.c     |    4 
 xen/arch/x86/mpparse.c |    4 
 xen/arch/x86/physdev.c |   26 +----
 xen/common/domain.c    |   10 +-
 5 files changed, 103 insertions(+), 177 deletions(-)

diff -r a020d0cdf594 -r a7de5bd776ca xen/arch/x86/io_apic.c
--- a/xen/arch/x86/io_apic.c    Tue Sep 01 11:32:47 2009 +0100
+++ b/xen/arch/x86/io_apic.c    Tue Sep 01 11:34:31 2009 +0100
@@ -85,35 +85,7 @@ static struct irq_pin_list {
     unsigned int next;
 } *irq_2_pin;
 
-static int *pin_irq_map;
-
 static unsigned int irq_2_pin_free_entry;
-
-/* Use an arry to record pin_2_irq_mapping */
-static int get_irq_from_apic_pin(int apic, int pin)
-{
-    int i, pin_base = 0;
-
-    ASSERT(apic < nr_ioapics);
-    
-    for (i = 0; i < apic; i++)
-        pin_base += nr_ioapic_registers[i];
-
-    return pin_irq_map[pin_base + pin];
-}
-
-static void set_irq_to_apic_pin(int apic, int pin, int irq)
-{
-    
-    int i, pin_base = 0;
-
-    ASSERT(apic < nr_ioapics);
-    
-    for (i = 0; i < apic; i++)
-        pin_base += nr_ioapic_registers[i];
-
-    pin_irq_map[pin_base + pin] = irq;
-}
 
 /*
  * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
@@ -141,42 +113,6 @@ static void add_pin_to_irq(unsigned int 
     }
     entry->apic = apic;
     entry->pin = pin;
-
-    set_irq_to_apic_pin(apic, pin, irq);
-}
-
-static void remove_pin_at_irq(unsigned int irq, int apic, int pin)
-{
-    struct irq_pin_list *entry, *prev;
-
-    for (entry = &irq_2_pin[irq]; ; entry = &irq_2_pin[entry->next]) {
-        if ((entry->apic == apic) && (entry->pin == pin))
-            break;
-        if (!entry->next)
-            BUG();
-    }
-
-    entry->pin = entry->apic = -1;
-    
-    if (entry != &irq_2_pin[irq]) {
-        /* Removed entry is not at head of list. */
-        prev = &irq_2_pin[irq];
-        while (&irq_2_pin[prev->next] != entry)
-            prev = &irq_2_pin[prev->next];
-        prev->next = entry->next;
-        entry->next = irq_2_pin_free_entry;
-        irq_2_pin_free_entry = entry - irq_2_pin;
-    } else if (entry->next != 0) {
-        /* Removed entry is at head of multi-item list. */
-        prev  = entry;
-        entry = &irq_2_pin[entry->next];
-        *prev = *entry;
-        entry->pin = entry->apic = -1;
-        entry->next = irq_2_pin_free_entry;
-        irq_2_pin_free_entry = entry - irq_2_pin;
-    }
-
-    set_irq_to_apic_pin(apic, pin, -1);
 }
 
 /*
@@ -1138,15 +1074,11 @@ static void __init enable_IO_APIC(void)
     /* Initialise dynamic irq_2_pin free list. */
     irq_2_pin = xmalloc_array(struct irq_pin_list, PIN_MAP_SIZE);
     memset(irq_2_pin, 0, PIN_MAP_SIZE * sizeof(*irq_2_pin));
-    pin_irq_map = xmalloc_array(int, nr_irqs_gsi);
-    memset(pin_irq_map, 0, nr_irqs_gsi * sizeof(int));
         
     for (i = 0; i < PIN_MAP_SIZE; i++)
         irq_2_pin[i].pin = -1;
     for (i = irq_2_pin_free_entry = nr_irqs_gsi; i < PIN_MAP_SIZE; i++)
         irq_2_pin[i].next = i + 1;
-    for (i = 0; i < nr_irqs_gsi; i++)
-        pin_irq_map[i] = -1;
 
     for(apic = 0; apic < nr_ioapics; apic++) {
         int pin;
@@ -2165,6 +2097,24 @@ static int ioapic_physbase_to_id(unsigne
     return -EINVAL;
 }
 
+unsigned apic_gsi_base(int apic);
+
+int apic_pin_2_gsi_irq(int apic, int pin)
+{
+    int idx, irq;
+
+    if (apic < 0)
+       return -EINVAL;
+
+    irq = apic_gsi_base(apic) + pin;
+    if (apic == 0) {
+        idx = find_irq_entry(apic, pin, mp_INT);
+        if (idx >= 0)
+            irq = pin_2_irq(idx, apic, pin);
+    }
+    return irq;
+}
+
 int ioapic_guest_read(unsigned long physbase, unsigned int reg, u32 *pval)
 {
     int apic;
@@ -2182,17 +2132,19 @@ int ioapic_guest_read(unsigned long phys
 
 #define WARN_BOGUS_WRITE(f, a...)                                       \
     dprintk(XENLOG_INFO, "\n%s: "                                        \
-            "apic=%d, pin=%d, old_irq=%d, new_irq=%d\n"                 \
-            "%s: old_entry=%08x, new_entry=%08x\n"                      \
-            "%s: " f, __FUNCTION__, apic, pin, old_irq, new_irq,        \
-            __FUNCTION__, *(u32 *)&old_rte, *(u32 *)&new_rte,           \
+            "apic=%d, pin=%d, irq=%d\n"                 \
+            "%s: new_entry=%08x\n"                      \
+            "%s: " f, __FUNCTION__, apic, pin, irq,        \
+            __FUNCTION__, *(u32 *)&rte,           \
             __FUNCTION__ , ##a )
 
 int ioapic_guest_write(unsigned long physbase, unsigned int reg, u32 val)
 {
-    int apic, pin, old_irq = -1, new_irq = -1;
-    struct IO_APIC_route_entry old_rte = { 0 }, new_rte = { 0 };
+    int apic, pin, irq, ret, vector, pirq;
+    struct IO_APIC_route_entry rte = { 0 };
     unsigned long flags;
+    struct irq_cfg *cfg;
+    struct irq_desc *desc;
 
     if ( (apic = ioapic_physbase_to_id(physbase)) < 0 )
         return apic;
@@ -2204,7 +2156,7 @@ int ioapic_guest_write(unsigned long phy
     pin = (reg - 0x10) >> 1;
 
     /* Write first half from guest; second half is target info. */
-    *(u32 *)&new_rte = val;
+    *(u32 *)&rte = val;
 
     /*
      * What about weird destination types?
@@ -2214,7 +2166,7 @@ int ioapic_guest_write(unsigned long phy
      *  ExtINT: Ignore? Linux only asserts this at start of day.
      * For now, print a message and return an error. We can fix up on demand.
      */
-    if ( new_rte.delivery_mode > dest_LowestPrio )
+    if ( rte.delivery_mode > dest_LowestPrio )
     {
         printk("ERROR: Attempt to write weird IOAPIC destination mode!\n");
         printk("       APIC=%d/%d, lo-reg=%x\n", apic, pin, val);
@@ -2225,83 +2177,65 @@ int ioapic_guest_write(unsigned long phy
      * The guest does not know physical APIC arrangement (flat vs. cluster).
      * Apply genapic conventions for this platform.
      */
-    new_rte.delivery_mode = INT_DELIVERY_MODE;
-    new_rte.dest_mode     = INT_DEST_MODE;
+    rte.delivery_mode = INT_DELIVERY_MODE;
+    rte.dest_mode     = INT_DEST_MODE;
+
+    irq = apic_pin_2_gsi_irq(apic, pin);
+    if ( irq < 0 )
+        return irq;
+
+    desc = irq_to_desc(irq);
+    cfg = desc->chip_data;
+
+    /* Since PHYSDEVOP_alloc_irq_vector is dummy, rte.vector is the pirq
+       which corresponds to this ioapic pin, retrieve it for building
+       pirq and irq mapping. 
+     */
+    pirq = rte.vector;
+    if(pirq < 0 || pirq >= dom0->nr_pirqs)
+        return -EINVAL;
+    
+    if ( desc->action )
+    {
+        WARN_BOGUS_WRITE("Attempt to modify IO-APIC pin for in-use IRQ!\n");
+        return 0;
+    }
+
+    if ( cfg->vector <= 0 || cfg->vector > LAST_DYNAMIC_VECTOR ) {
+
+        printk("allocated vector for irq:%d\n", irq);
+        
+        vector = assign_irq_vector(irq);
+        if ( vector < 0 )
+            return vector;
+
+        add_pin_to_irq(irq, apic, pin);
+    }
+    spin_lock(&pcidevs_lock);
+    spin_lock(&dom0->event_lock);
+    ret = map_domain_pirq(dom0, pirq, irq,
+            MAP_PIRQ_TYPE_GSI, NULL);
+    spin_unlock(&dom0->event_lock);
+    spin_unlock(&pcidevs_lock);
+    if ( ret < 0 )
+        return ret;
 
     spin_lock_irqsave(&ioapic_lock, flags);
-
-    /* Read first (interesting) half of current routing entry. */
-    *(u32 *)&old_rte = io_apic_read(apic, 0x10 + 2 * pin);
-
-    /* No change to the first half of the routing entry? Bail quietly. */
-    if ( *(u32 *)&old_rte == *(u32 *)&new_rte )
-    {
-        spin_unlock_irqrestore(&ioapic_lock, flags);
-        return 0;
-    }
-
-    /* Special delivery modes (SMI,NMI,INIT,ExtInt) should have no vector.  */
-    if ( (old_rte.delivery_mode > dest_LowestPrio) && (old_rte.vector != 0) )
-    {
-        WARN_BOGUS_WRITE("Special delivery mode %d with non-zero vector "
-                         "%02x\n", old_rte.delivery_mode, old_rte.vector);
-        /* Nobble the vector here as it does not relate to a valid irq. */
-        old_rte.vector = 0;
-    }
-
-    if ( old_rte.vector >= FIRST_DYNAMIC_VECTOR )
-        old_irq = get_irq_from_apic_pin(apic, pin);
-
-    /* FIXME: dirty hack to support per-cpu vector. */
-    new_irq = new_rte.vector;
-
-    if ( (old_irq != new_irq) && (old_irq >= 0) && IO_APIC_IRQ(old_irq) )
-    {
-        if ( irq_desc[old_irq].action )
-        {
-            WARN_BOGUS_WRITE("Attempt to remove IO-APIC pin of in-use IRQ!\n");
-            spin_unlock_irqrestore(&ioapic_lock, flags);
-            return 0;
-        }
-
-        remove_pin_at_irq(old_irq, apic, pin);
-    }
-
-    if ( (new_irq >= 0) && IO_APIC_IRQ(new_irq) )
-    {
-        if ( irq_desc[new_irq].action )
-        {
-            WARN_BOGUS_WRITE("Attempt to %s IO-APIC pin for in-use IRQ!\n",
-                             (old_irq != new_irq) ? "add" : "modify");
-            spin_unlock_irqrestore(&ioapic_lock, flags);
-            return 0;
-        }
-        
-        /* Set the correct irq-handling type. */
-        irq_desc[new_irq].handler = new_rte.trigger ? 
-            &ioapic_level_type: &ioapic_edge_type;
-        
-        if ( old_irq != new_irq )
-            add_pin_to_irq(new_irq, apic, pin);
-
-        /* Mask iff level triggered. */
-        new_rte.mask = new_rte.trigger;
-        /* Set the vector field to the real vector! */
-        new_rte.vector = irq_cfg[new_irq].vector;
-    }
-    else if ( !new_rte.mask )
-    {
-        /* This pin leads nowhere but the guest has not masked it. */
-        WARN_BOGUS_WRITE("Installing bogus unmasked IO-APIC entry!\n");
-        new_rte.mask = 1;
-    }
-
-    new_rte.dest.logical.logical_dest = 
-    cpu_mask_to_apicid(irq_cfg[new_irq].domain);
-
-    io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&new_rte) + 0));
-    io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&new_rte) + 1));
-
+    /* Set the correct irq-handling type. */
+    desc->handler = rte.trigger ? 
+        &ioapic_level_type: &ioapic_edge_type;
+
+    /* Mask iff level triggered. */
+    rte.mask = rte.trigger;
+    /* Set the vector field to the real vector! */
+    rte.vector = cfg->vector;
+
+    rte.dest.logical.logical_dest = 
+    cpu_mask_to_apicid(cfg->domain);
+
+    io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&rte) + 0));
+    io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&rte) + 1));
+    
     spin_unlock_irqrestore(&ioapic_lock, flags);
 
     return 0;
diff -r a020d0cdf594 -r a7de5bd776ca xen/arch/x86/irq.c
--- a/xen/arch/x86/irq.c        Tue Sep 01 11:32:47 2009 +0100
+++ b/xen/arch/x86/irq.c        Tue Sep 01 11:34:31 2009 +0100
@@ -1293,9 +1293,9 @@ int map_domain_pirq(
     if ( (old_irq && (old_irq != irq) ) ||
          (old_pirq && (old_pirq != pirq)) )
     {
-        dprintk(XENLOG_G_ERR, "dom%d: pirq %d or irq %d already mapped\n",
+        dprintk(XENLOG_G_WARNING, "dom%d: pirq %d or irq %d already mapped\n",
                 d->domain_id, pirq, irq);
-        return -EINVAL;
+        return 0;
     }
 
     ret = irq_permit_access(d, pirq);
diff -r a020d0cdf594 -r a7de5bd776ca xen/arch/x86/mpparse.c
--- a/xen/arch/x86/mpparse.c    Tue Sep 01 11:32:47 2009 +0100
+++ b/xen/arch/x86/mpparse.c    Tue Sep 01 11:34:31 2009 +0100
@@ -958,6 +958,10 @@ unsigned highest_gsi(void)
     return res;
 }
 
+unsigned apic_gsi_base(int apic)
+{
+    return mp_ioapic_routing[apic].gsi_base;
+}
 
 void __init mp_override_legacy_irq (
        u8                      bus_irq,
diff -r a020d0cdf594 -r a7de5bd776ca xen/arch/x86/physdev.c
--- a/xen/arch/x86/physdev.c    Tue Sep 01 11:32:47 2009 +0100
+++ b/xen/arch/x86/physdev.c    Tue Sep 01 11:34:31 2009 +0100
@@ -329,7 +329,6 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
 
     case PHYSDEVOP_alloc_irq_vector: {
         struct physdev_irq irq_op;
-        int vector;
 
         ret = -EFAULT;
         if ( copy_from_guest(&irq_op, arg, 1) != 0 )
@@ -343,26 +342,13 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
         if ( ret )
             break;
 
-        irq = irq_op.irq;
-        ret = -EINVAL;
+        /* Vector is only used by hypervisor, and dom0 shouldn't
+         touch it in its world, return irq_op.irq as the vecotr,
+         and make this hypercall dummy, and also defer the vector 
+         allocation when dom0 tries to programe ioapic entry. */
+        irq_op.vector = irq_op.irq;
+        ret = 0;
         
-        /* FIXME: Once dom0 breaks GSI IRQ limit, it is
-            a must to eliminate the limit here */
-        BUG_ON(irq >= 256);
-        
-        vector = assign_irq_vector(irq);
-        if (vector >= FIRST_DYNAMIC_VECTOR)
-            irq_op.vector = irq;
-        else
-            irq_op.vector = -ENOSPC;
-
-        spin_lock(&pcidevs_lock);
-        spin_lock(&dom0->event_lock);
-        ret = map_domain_pirq(dom0, irq_op.irq, irq,
-                              MAP_PIRQ_TYPE_GSI, NULL);
-        spin_unlock(&dom0->event_lock);
-        spin_unlock(&pcidevs_lock);
-
         if ( copy_to_guest(arg, &irq_op, 1) != 0 )
             ret = -EFAULT;
         break;
diff -r a020d0cdf594 -r a7de5bd776ca xen/common/domain.c
--- a/xen/common/domain.c       Tue Sep 01 11:32:47 2009 +0100
+++ b/xen/common/domain.c       Tue Sep 01 11:34:31 2009 +0100
@@ -197,7 +197,7 @@ struct vcpu *alloc_idle_vcpu(unsigned in
     return alloc_vcpu(d, vcpu_id, cpu_id);
 }
 
-static unsigned int extra_dom0_irqs, extra_domU_irqs = 8;
+static unsigned int extra_dom0_irqs = 256, extra_domU_irqs = 32;
 static void __init parse_extra_guest_irqs(const char *s)
 {
     if ( isdigit(*s) )
@@ -253,9 +253,11 @@ struct domain *domain_create(
         d->is_paused_by_controller = 1;
         atomic_inc(&d->pause_count);
 
-        d->nr_pirqs = (nr_irqs_gsi +
-                       (domid ? extra_domU_irqs :
-                        extra_dom0_irqs ?: nr_irqs_gsi));
+        if ( domid )
+            d->nr_pirqs = nr_irqs_gsi + extra_domU_irqs;
+        else
+            d->nr_pirqs = nr_irqs_gsi + extra_dom0_irqs;
+
         d->pirq_to_evtchn = xmalloc_array(u16, d->nr_pirqs);
         d->pirq_mask = xmalloc_array(
             unsigned long, BITS_TO_LONGS(d->nr_pirqs));

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.