[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] Merge



# HG changeset patch
# User Ian Jackson <Ian.Jackson@xxxxxxxxxxxxx>
# Date 1308850495 -3600
# Node ID b240183197720129a8d83847bc5592d6dff3d530
# Parent  3dcb553f3ba9db88ddb6029dc8d8cfa424770188
# Parent  e2235fe267eb0bde557f11cfc89462a11092f2bd
Merge
---


diff -r 3dcb553f3ba9 -r b24018319772 xen/arch/ia64/vmx/vmx_interrupt.c
--- a/xen/arch/ia64/vmx/vmx_interrupt.c Fri Jun 10 10:47:29 2011 +0200
+++ b/xen/arch/ia64/vmx/vmx_interrupt.c Thu Jun 23 18:34:55 2011 +0100
@@ -155,13 +155,13 @@
     /* dummy */
 }
 
-int msixtbl_pt_register(struct domain *d, int pirq, uint64_t gtable)
+int msixtbl_pt_register(struct domain *d, struct pirq *pirq, uint64_t gtable)
 {
     /* dummy */
     return -ENOSYS;
 }
 
-void msixtbl_pt_unregister(struct domain *d, int pirq)
+void msixtbl_pt_unregister(struct domain *d, struct pirq *pirq)
 {
     /* dummy */
 }
diff -r 3dcb553f3ba9 -r b24018319772 xen/arch/ia64/xen/hypercall.c
--- a/xen/arch/ia64/xen/hypercall.c     Fri Jun 10 10:47:29 2011 +0200
+++ b/xen/arch/ia64/xen/hypercall.c     Thu Jun 23 18:34:55 2011 +0100
@@ -65,8 +65,11 @@
 {
        if ( pirq < 0 || pirq >= NR_IRQS )
                return -EINVAL;
-       if ( d->arch.pirq_eoi_map )
-               evtchn_unmask(d->pirq_to_evtchn[pirq]);
+       if ( d->arch.pirq_eoi_map ) {
+               spin_lock(&d->event_lock);
+               evtchn_unmask(pirq_to_evtchn(d, pirq));
+               spin_unlock(&d->event_lock);
+       }
        return pirq_guest_eoi(d, pirq);
 }
 
diff -r 3dcb553f3ba9 -r b24018319772 xen/arch/ia64/xen/irq.c
--- a/xen/arch/ia64/xen/irq.c   Fri Jun 10 10:47:29 2011 +0200
+++ b/xen/arch/ia64/xen/irq.c   Thu Jun 23 18:34:55 2011 +0100
@@ -363,15 +363,17 @@
     irq_desc_t         *desc = &irq_desc[irq];
     irq_guest_action_t *action = (irq_guest_action_t *)desc->action;
     struct domain      *d;
+    struct pirq        *pirq;
     int                 i, already_pending = 0;
 
     for ( i = 0; i < action->nr_guests; i++ )
     {
         d = action->guest[i];
+        pirq = pirq_info(d, irq);
         if ( (action->ack_type != ACKTYPE_NONE) &&
-             !test_and_set_bit(irq, &d->pirq_mask) )
+             !test_and_set_bool(pirq->masked) )
             action->in_flight++;
-               if ( hvm_do_IRQ_dpci(d, irq) )
+               if ( hvm_do_IRQ_dpci(d, pirq) )
                {
                        if ( action->ack_type == ACKTYPE_NONE )
                        {
@@ -379,7 +381,7 @@
                                desc->status |= IRQ_INPROGRESS; /* cleared 
during hvm eoi */
                        }
                }
-               else if ( send_guest_pirq(d, irq) &&
+               else if ( send_guest_pirq(d, pirq) &&
                                (action->ack_type == ACKTYPE_NONE) )
                {
                        already_pending++;
@@ -423,26 +425,23 @@
     return ACKTYPE_NONE;
 }
 
-int pirq_guest_eoi(struct domain *d, int irq)
+int pirq_guest_eoi(struct domain *d, struct pirq *pirq)
 {
     irq_desc_t *desc;
     irq_guest_action_t *action;
 
-    if ( (irq < 0) || (irq >= NR_IRQS) )
-        return -EINVAL;
-
     desc = &irq_desc[irq];
     spin_lock_irq(&desc->lock);
     action = (irq_guest_action_t *)desc->action;
 
     if ( action->ack_type == ACKTYPE_NONE )
     {
-        ASSERT(!test_bit(irq, d->pirq_mask));
+        ASSERT(!pirq->masked);
         stop_timer(&irq_guest_eoi_timer[irq]);
         _irq_guest_eoi(desc);
     }
 
-    if ( test_and_clear_bit(irq, &d->pirq_mask) && (--action->in_flight == 0) )
+    if ( test_and_clear_bool(pirq->masked) && (--action->in_flight == 0) )
     {
         ASSERT(action->ack_type == ACKTYPE_UNMASK);
         desc->handler->end(irq);
@@ -455,24 +454,28 @@
 
 int pirq_guest_unmask(struct domain *d)
 {
-    int            irq;
+    unsigned int pirq = 0, n, i;
+    struct pirq *pirqs[16];
     shared_info_t *s = d->shared_info;
 
-    for ( irq = find_first_bit(d->pirq_mask, NR_IRQS);
-          irq < NR_IRQS;
-          irq = find_next_bit(d->pirq_mask, NR_IRQS, irq+1) )
-    {
-        if ( !test_bit(d->pirq_to_evtchn[irq], &s->evtchn_mask[0]) )
-            pirq_guest_eoi(d, irq);
-
-    }
+    do {
+        n = radix_tree_gang_lookup(&d->pirq_tree, (void **)pirqs, pirq,
+                                   ARRAY_SIZE(pirqs));
+        for ( i = 0; i < n; ++i )
+        {
+            pirq = pirqs[i]->pirq;
+            if ( pirqs[i]->masked &&
+                 !test_bit(pirqs[i]->evtchn, &s->evtchn_mask[0]) )
+            pirq_guest_eoi(d, pirqs[i]);
+        }
+    } while ( ++pirq < d->nr_pirqs && n == ARRAY_SIZE(pirqs) );
 
     return 0;
 }
 
-int pirq_guest_bind(struct vcpu *v, int irq, int will_share)
+int pirq_guest_bind(struct vcpu *v, struct pirq *pirq, int will_share)
 {
-    irq_desc_t         *desc = &irq_desc[irq];
+    irq_desc_t         *desc = &irq_desc[pirq->pirq];
     irq_guest_action_t *action;
     unsigned long       flags;
     int                 rc = 0;
@@ -492,7 +495,7 @@
         {
             gdprintk(XENLOG_INFO,
                     "Cannot bind IRQ %d to guest. In use by '%s'.\n",
-                    irq, desc->action->name);
+                    pirq->pirq, desc->action->name);
             rc = -EBUSY;
             goto out;
         }
@@ -502,7 +505,7 @@
         {
             gdprintk(XENLOG_INFO,
                     "Cannot bind IRQ %d to guest. Out of memory.\n",
-                    irq);
+                    pirq->pirq);
             rc = -ENOMEM;
             goto out;
         }
@@ -515,7 +518,7 @@
         desc->depth = 0;
         desc->status |= IRQ_GUEST;
         desc->status &= ~IRQ_DISABLED;
-        desc->handler->startup(irq);
+        desc->handler->startup(pirq->pirq);
 
         /* Attempt to bind the interrupt target to the correct CPU. */
 #if 0 /* FIXME CONFIG_SMP ??? */
@@ -528,7 +531,7 @@
     {
         gdprintk(XENLOG_INFO,
                 "Cannot bind IRQ %d to guest. Will not share with others.\n",
-                irq);
+                pirq->pirq);
         rc = -EBUSY;
         goto out;
     }
@@ -537,7 +540,7 @@
     {
         gdprintk(XENLOG_INFO,
                 "Cannot bind IRQ %d to guest. Already at max share.\n",
-                irq);
+                pirq->pirq);
         rc = -EBUSY;
         goto out;
     }
@@ -545,16 +548,16 @@
     action->guest[action->nr_guests++] = v->domain;
 
     if ( action->ack_type != ACKTYPE_NONE )
-        set_pirq_eoi(v->domain, irq);
+        set_pirq_eoi(v->domain, pirq->pirq);
     else
-        clear_pirq_eoi(v->domain, irq);
+        clear_pirq_eoi(v->domain, pirq->pirq);
 
  out:
     spin_unlock_irqrestore(&desc->lock, flags);
     return rc;
 }
 
-void pirq_guest_unbind(struct domain *d, int irq)
+void pirq_guest_unbind(struct domain *d, int irq, struct pirq *pirq)
 {
     irq_desc_t         *desc = &irq_desc[irq];
     irq_guest_action_t *action;
@@ -572,7 +575,7 @@
     action->nr_guests--;
 
     if ( action->ack_type == ACKTYPE_UNMASK )
-        if ( test_and_clear_bit(irq, &d->pirq_mask) &&
+        if ( test_and_clear_bool(pirq->masked) &&
              (--action->in_flight == 0) )
             desc->handler->end(irq);
 
diff -r 3dcb553f3ba9 -r b24018319772 xen/arch/x86/acpi/cpu_idle.c
--- a/xen/arch/x86/acpi/cpu_idle.c      Fri Jun 10 10:47:29 2011 +0200
+++ b/xen/arch/x86/acpi/cpu_idle.c      Thu Jun 23 18:34:55 2011 +0100
@@ -42,6 +42,7 @@
 #include <xen/cpuidle.h>
 #include <xen/trace.h>
 #include <xen/sched-if.h>
+#include <xen/irq.h>
 #include <asm/cache.h>
 #include <asm/io.h>
 #include <asm/hpet.h>
diff -r 3dcb553f3ba9 -r b24018319772 xen/arch/x86/bzimage.c
--- a/xen/arch/x86/bzimage.c    Fri Jun 10 10:47:29 2011 +0200
+++ b/xen/arch/x86/bzimage.c    Thu Jun 23 18:34:55 2011 +0100
@@ -5,6 +5,7 @@
 #include <xen/string.h>
 #include <xen/types.h>
 #include <xen/decompress.h>
+#include <xen/libelf.h>
 #include <asm/bzimage.h>
 
 #define HEAPORDER 3
@@ -200,25 +201,36 @@
     return 1;
 }
 
-int __init bzimage_headroom(char *image_start, unsigned long image_length)
+static unsigned long __initdata orig_image_len;
+
+unsigned long __init bzimage_headroom(char *image_start,
+                                      unsigned long image_length)
 {
     struct setup_header *hdr = (struct setup_header *)image_start;
-    char *img;
-    int err, headroom;
+    int err;
+    unsigned long headroom;
 
     err = bzimage_check(hdr, image_length);
-    if (err < 1)
+    if ( err < 0 )
         return 0;
 
-    img = image_start + (hdr->setup_sects+1) * 512;
-    img += hdr->payload_offset;
+    if ( err > 0 )
+    {
+        image_start += (hdr->setup_sects + 1) * 512 + hdr->payload_offset;
+        image_length = hdr->payload_length;
+    }
 
-    headroom = output_length(img, hdr->payload_length);
-    if (gzip_check(img, hdr->payload_length)) {
+    if ( elf_is_elfbinary(image_start) )
+        return 0;
+
+    orig_image_len = image_length;
+    headroom = output_length(image_start, image_length);
+    if (gzip_check(image_start, image_length))
+    {
         headroom += headroom >> 12; /* Add 8 bytes for every 32K input block */
         headroom += (32768 + 18); /* Add 32K + 18 bytes of extra headroom */
     } else
-        headroom += hdr->payload_length;
+        headroom += image_length;
     headroom = (headroom + 4095) & ~4095;
 
     return headroom;
@@ -230,18 +242,24 @@
     int err = bzimage_check(hdr, *image_len);
     unsigned long output_len;
 
-    if (err < 1)
+    if ( err < 0 )
         return err;
 
+    if ( err > 0 )
+    {
+        *image_start += (hdr->setup_sects + 1) * 512 + hdr->payload_offset;
+        *image_len = hdr->payload_length;
+    }
+
+    if ( elf_is_elfbinary(*image_start) )
+        return 0;
+
     BUG_ON(!(image_base < *image_start));
 
-    *image_start += (hdr->setup_sects+1) * 512;
-    *image_start += hdr->payload_offset;
-    *image_len = hdr->payload_length;
-    output_len = output_length(*image_start, *image_len);
+    output_len = output_length(*image_start, orig_image_len);
 
-    if ( (err = perform_gunzip(image_base, *image_start, *image_len)) > 0 )
-        err = decompress(*image_start, *image_len, image_base);
+    if ( (err = perform_gunzip(image_base, *image_start, orig_image_len)) > 0 )
+        err = decompress(*image_start, orig_image_len, image_base);
 
     if ( !err )
     {
diff -r 3dcb553f3ba9 -r b24018319772 xen/arch/x86/dmi_scan.c
--- a/xen/arch/x86/dmi_scan.c   Fri Jun 10 10:47:29 2011 +0200
+++ b/xen/arch/x86/dmi_scan.c   Thu Jun 23 18:34:55 2011 +0100
@@ -10,11 +10,31 @@
 #include <asm/system.h>
 #include <xen/dmi.h>
 
-#define bt_ioremap(b,l)  ((u8 *)__acpi_map_table(b,l))
+#define bt_ioremap(b,l)  ((void *)__acpi_map_table(b,l))
 #define bt_iounmap(b,l)  ((void)0)
 #define memcpy_fromio    memcpy
 #define alloc_bootmem(l) xmalloc_bytes(l)
 
+struct dmi_eps {
+       char anchor[5];                 /* "_DMI_" */
+       u8 checksum;
+       u16 size;
+       u32 address;
+       u16 num_structures;
+       u8 revision;
+} __attribute__((packed));
+
+struct smbios_eps {
+       char anchor[4];                 /* "_SM_" */
+       u8 checksum;
+       u8 length;
+       u8 major, minor;
+       u16 max_size;
+       u8 revision;
+       u8 _rsrvd_[5];
+       struct dmi_eps dmi;
+} __attribute__((packed));
+
 struct dmi_header
 {
        u8      type;
@@ -90,62 +110,70 @@
 }
 
 
-inline static int __init dmi_checksum(u8 *buf)
+static inline bool_t __init dmi_checksum(const void __iomem *buf,
+                                        unsigned int len)
 {
-       u8 sum=0;
-       int a;
+       u8 sum = 0;
+       const u8 *p = buf;
+       unsigned int a;
        
-       for(a=0; a<15; a++)
-               sum+=buf[a];
-       return (sum==0);
+       for (a = 0; a < len; a++)
+               sum += p[a];
+       return sum == 0;
 }
 
 int __init dmi_get_table(u32 *base, u32 *len)
 {
-       u8 buf[15];
+       struct dmi_eps eps;
        char __iomem *p, *q;
 
        p = maddr_to_virt(0xF0000);
        for (q = p; q < p + 0x10000; q += 16) {
-               memcpy_fromio(buf, q, 15);
-               if (memcmp(buf, "_DMI_", 5)==0 && dmi_checksum(buf)) {
-                       *base=buf[11]<<24|buf[10]<<16|buf[9]<<8|buf[8];
-                       *len=buf[7]<<8|buf[6];
+               memcpy_fromio(&eps, q, 15);
+               if (memcmp(eps.anchor, "_DMI_", 5) == 0 &&
+                   dmi_checksum(&eps, sizeof(eps))) {
+                       *base = eps.address;
+                       *len = eps.size;
                        return 0;
                }
        }
        return -1;
 }
 
+static int __init _dmi_iterate(const struct dmi_eps *dmi,
+                              const struct smbios_eps __iomem *smbios,
+                              void (*decode)(struct dmi_header *))
+{
+       u16 num = dmi->num_structures;
+       u16 len = dmi->size;
+       u32 base = dmi->address;
+
+       /*
+        * DMI version 0.0 means that the real version is taken from
+        * the SMBIOS version, which we may not know at this point.
+        */
+       if (dmi->revision)
+               printk(KERN_INFO "DMI %d.%d present.\n",
+                      dmi->revision >> 4,  dmi->revision & 0x0f);
+       else if (!smbios)
+               printk(KERN_INFO "DMI present.\n");
+       dmi_printk((KERN_INFO "%d structures occupying %d bytes.\n",
+                   num, len));
+       dmi_printk((KERN_INFO "DMI table at 0x%08X.\n", base));
+       return dmi_table(base, len, num, decode);
+}
+
 static int __init dmi_iterate(void (*decode)(struct dmi_header *))
 {
-       u8 buf[15];
+       struct dmi_eps eps;
        char __iomem *p, *q;
 
        p = maddr_to_virt(0xF0000);
        for (q = p; q < p + 0x10000; q += 16) {
-               memcpy_fromio(buf, q, 15);
-               if (memcmp(buf, "_DMI_", 5)==0 && dmi_checksum(buf)) {
-                       u16 num=buf[13]<<8|buf[12];
-                       u16 len=buf[7]<<8|buf[6];
-                       u32 base=buf[11]<<24|buf[10]<<16|buf[9]<<8|buf[8];
-
-                       /*
-                        * DMI version 0.0 means that the real version is taken 
from
-                        * the SMBIOS version, which we don't know at this 
point.
-                        */
-                       if(buf[14]!=0)
-                               printk(KERN_INFO "DMI %d.%d present.\n",
-                                       buf[14]>>4, buf[14]&0x0F);
-                       else
-                               printk(KERN_INFO "DMI present.\n");
-                       dmi_printk((KERN_INFO "%d structures occupying %d 
bytes.\n",
-                               num, len));
-                       dmi_printk((KERN_INFO "DMI table at 0x%08X.\n",
-                               base));
-                       if(dmi_table(base,len, num, decode)==0)
-                               return 0;
-               }
+               memcpy_fromio(&eps, q, sizeof(eps));
+               if (memcmp(eps.anchor, "_DMI_", 5) == 0 &&
+                   dmi_checksum(&eps, sizeof(eps)))
+                       return _dmi_iterate(&eps, NULL, decode);
        }
        return -1;
 }
diff -r 3dcb553f3ba9 -r b24018319772 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Fri Jun 10 10:47:29 2011 +0200
+++ b/xen/arch/x86/domain.c     Thu Jun 23 18:34:55 2011 +0100
@@ -591,25 +591,9 @@
         share_xen_page_with_guest(
             virt_to_page(d->shared_info), d, XENSHARE_writable);
 
-        d->arch.pirq_irq = xmalloc_array(int, d->nr_pirqs);
-        if ( !d->arch.pirq_irq )
-            goto fail;
-        memset(d->arch.pirq_irq, 0,
-               d->nr_pirqs * sizeof(*d->arch.pirq_irq));
-
         if ( (rc = init_domain_irq_mapping(d)) != 0 )
             goto fail;
 
-        if ( is_hvm_domain(d) )
-        {
-            d->arch.pirq_emuirq = xmalloc_array(int, d->nr_pirqs);
-            if ( !d->arch.pirq_emuirq )
-                goto fail;
-            for (i = 0; i < d->nr_pirqs; i++)
-                d->arch.pirq_emuirq[i] = IRQ_UNBOUND;
-        }
-
-
         if ( (rc = iommu_domain_init(d)) != 0 )
             goto fail;
 
@@ -643,8 +627,6 @@
  fail:
     d->is_dying = DOMDYING_dead;
     vmce_destroy_msr(d);
-    xfree(d->arch.pirq_irq);
-    xfree(d->arch.pirq_emuirq);
     cleanup_domain_irq_mapping(d);
     free_xenheap_page(d->shared_info);
     if ( paging_initialised )
@@ -697,8 +679,6 @@
 #endif
 
     free_xenheap_page(d->shared_info);
-    xfree(d->arch.pirq_irq);
-    xfree(d->arch.pirq_emuirq);
     cleanup_domain_irq_mapping(d);
 }
 
diff -r 3dcb553f3ba9 -r b24018319772 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c    Fri Jun 10 10:47:29 2011 +0200
+++ b/xen/arch/x86/hvm/hvm.c    Thu Jun 23 18:34:55 2011 +0100
@@ -249,32 +249,36 @@
     pt_migrate(v);
 }
 
-void hvm_migrate_pirqs(struct vcpu *v)
+static int hvm_migrate_pirq(struct domain *d, struct hvm_pirq_dpci *pirq_dpci,
+                            void *arg)
 {
-    int pirq, irq;
-    struct irq_desc *desc;
-    struct domain *d = v->domain;
-    struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci;
-    
-    if ( !iommu_enabled || (hvm_irq_dpci == NULL) )
-       return;
-
-    spin_lock(&d->event_lock);
-    for ( pirq = find_first_bit(hvm_irq_dpci->mapping, d->nr_pirqs);
-          pirq < d->nr_pirqs;
-          pirq = find_next_bit(hvm_irq_dpci->mapping, d->nr_pirqs, pirq + 1) )
+    struct vcpu *v = arg;
+
+    if ( (pirq_dpci->flags & HVM_IRQ_DPCI_MACH_MSI) &&
+         (pirq_dpci->gmsi.dest_vcpu_id == v->vcpu_id) )
     {
-        if ( !(hvm_irq_dpci->mirq[pirq].flags & HVM_IRQ_DPCI_MACH_MSI) ||
-               (hvm_irq_dpci->mirq[pirq].gmsi.dest_vcpu_id != v->vcpu_id) )
-            continue;
-        desc = domain_spin_lock_irq_desc(v->domain, pirq, NULL);
-        if (!desc)
-            continue;
-        irq = desc - irq_desc;
-        ASSERT(MSI_IRQ(irq));
+        struct irq_desc *desc =
+            pirq_spin_lock_irq_desc(d, dpci_pirq(pirq_dpci), NULL);
+
+        if ( !desc )
+            return 0;
+        ASSERT(MSI_IRQ(desc - irq_desc));
         irq_set_affinity(desc, cpumask_of(v->processor));
         spin_unlock_irq(&desc->lock);
     }
+
+    return 0;
+}
+
+void hvm_migrate_pirqs(struct vcpu *v)
+{
+    struct domain *d = v->domain;
+
+    if ( !iommu_enabled || !d->arch.hvm_domain.irq.dpci )
+       return;
+
+    spin_lock(&d->event_lock);
+    pt_pirq_iterate(d, hvm_migrate_pirq, v);
     spin_unlock(&d->event_lock);
 }
 
diff -r 3dcb553f3ba9 -r b24018319772 xen/arch/x86/hvm/irq.c
--- a/xen/arch/x86/hvm/irq.c    Fri Jun 10 10:47:29 2011 +0200
+++ b/xen/arch/x86/hvm/irq.c    Thu Jun 23 18:34:55 2011 +0100
@@ -31,7 +31,9 @@
 /* Must be called with hvm_domain->irq_lock hold */
 static void assert_irq(struct domain *d, unsigned ioapic_gsi, unsigned pic_irq)
 {
-    int pirq = domain_emuirq_to_pirq(d, ioapic_gsi);
+    struct pirq *pirq =
+        pirq_info(d, domain_emuirq_to_pirq(d, ioapic_gsi));
+
     if ( hvm_domain_use_pirq(d, pirq) )
     {
         send_guest_pirq(d, pirq);
@@ -44,7 +46,9 @@
 /* Must be called with hvm_domain->irq_lock hold */
 static void deassert_irq(struct domain *d, unsigned isa_irq)
 {
-    int pirq = domain_emuirq_to_pirq(d, isa_irq);
+    struct pirq *pirq =
+        pirq_info(d, domain_emuirq_to_pirq(d, isa_irq));
+
     if ( !hvm_domain_use_pirq(d, pirq) )
         vpic_irq_negative_edge(d, isa_irq);
 }
diff -r 3dcb553f3ba9 -r b24018319772 xen/arch/x86/hvm/svm/vpmu.c
--- a/xen/arch/x86/hvm/svm/vpmu.c       Fri Jun 10 10:47:29 2011 +0200
+++ b/xen/arch/x86/hvm/svm/vpmu.c       Thu Jun 23 18:34:55 2011 +0100
@@ -26,6 +26,7 @@
 #include <xen/xenoprof.h>
 #include <xen/hvm/save.h>
 #include <xen/sched.h>
+#include <xen/irq.h>
 #include <asm/apic.h>
 #include <asm/hvm/vlapic.h>
 #include <asm/hvm/vpmu.h>
diff -r 3dcb553f3ba9 -r b24018319772 xen/arch/x86/hvm/vmsi.c
--- a/xen/arch/x86/hvm/vmsi.c   Fri Jun 10 10:47:29 2011 +0200
+++ b/xen/arch/x86/hvm/vmsi.c   Thu Jun 23 18:34:55 2011 +0100
@@ -32,6 +32,7 @@
 #include <xen/lib.h>
 #include <xen/errno.h>
 #include <xen/sched.h>
+#include <xen/irq.h>
 #include <public/hvm/ioreq.h>
 #include <asm/hvm/io.h>
 #include <asm/hvm/vpic.h>
@@ -110,11 +111,10 @@
     return 1;
 }
 
-int vmsi_deliver_pirq(struct domain *d, int pirq)
+int vmsi_deliver_pirq(struct domain *d, const struct hvm_pirq_dpci *pirq_dpci)
 {
-    struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci;
-    uint32_t flags = hvm_irq_dpci->mirq[pirq].gmsi.gflags;
-    int vector = hvm_irq_dpci->mirq[pirq].gmsi.gvec;
+    uint32_t flags = pirq_dpci->gmsi.gflags;
+    int vector = pirq_dpci->gmsi.gvec;
     uint8_t dest = (uint8_t)flags;
     uint8_t dest_mode = !!(flags & VMSI_DM_MASK);
     uint8_t delivery_mode = (flags & VMSI_DELIV_MASK)
@@ -126,11 +126,7 @@
                 "vector=%x trig_mode=%x\n",
                 dest, dest_mode, delivery_mode, vector, trig_mode);
 
-    if ( !(hvm_irq_dpci->mirq[pirq].flags & HVM_IRQ_DPCI_GUEST_MSI) )
-    {
-        gdprintk(XENLOG_WARNING, "pirq %x not msi \n", pirq);
-        return 0;
-    }
+    ASSERT(pirq_dpci->flags & HVM_IRQ_DPCI_GUEST_MSI);
 
     vmsi_deliver(d, vector, dest, dest_mode, delivery_mode, trig_mode);
     return 1;
@@ -360,7 +356,7 @@
     call_rcu(&entry->rcu, free_msixtbl_entry);
 }
 
-int msixtbl_pt_register(struct domain *d, int pirq, uint64_t gtable)
+int msixtbl_pt_register(struct domain *d, struct pirq *pirq, uint64_t gtable)
 {
     struct irq_desc *irq_desc;
     struct msi_desc *msi_desc;
@@ -369,6 +365,7 @@
     int r = -EINVAL;
 
     ASSERT(spin_is_locked(&pcidevs_lock));
+    ASSERT(spin_is_locked(&d->event_lock));
 
     /*
      * xmalloc() with irq_disabled causes the failure of check_lock() 
@@ -378,7 +375,7 @@
     if ( !new_entry )
         return -ENOMEM;
 
-    irq_desc = domain_spin_lock_irq_desc(d, pirq, NULL);
+    irq_desc = pirq_spin_lock_irq_desc(d, pirq, NULL);
     if ( !irq_desc )
     {
         xfree(new_entry);
@@ -415,7 +412,7 @@
     return r;
 }
 
-void msixtbl_pt_unregister(struct domain *d, int pirq)
+void msixtbl_pt_unregister(struct domain *d, struct pirq *pirq)
 {
     struct irq_desc *irq_desc;
     struct msi_desc *msi_desc;
@@ -423,8 +420,9 @@
     struct msixtbl_entry *entry;
 
     ASSERT(spin_is_locked(&pcidevs_lock));
+    ASSERT(spin_is_locked(&d->event_lock));
 
-    irq_desc = domain_spin_lock_irq_desc(d, pirq, NULL);
+    irq_desc = pirq_spin_lock_irq_desc(d, pirq, NULL);
     if ( !irq_desc )
         return;
 
diff -r 3dcb553f3ba9 -r b24018319772 xen/arch/x86/hvm/vmx/realmode.c
--- a/xen/arch/x86/hvm/vmx/realmode.c   Fri Jun 10 10:47:29 2011 +0200
+++ b/xen/arch/x86/hvm/vmx/realmode.c   Thu Jun 23 18:34:55 2011 +0100
@@ -14,6 +14,7 @@
 #include <xen/lib.h>
 #include <xen/sched.h>
 #include <xen/paging.h>
+#include <xen/softirq.h>
 #include <asm/event.h>
 #include <asm/hvm/emulate.h>
 #include <asm/hvm/hvm.h>
diff -r 3dcb553f3ba9 -r b24018319772 xen/arch/x86/hvm/vmx/vpmu_core2.c
--- a/xen/arch/x86/hvm/vmx/vpmu_core2.c Fri Jun 10 10:47:29 2011 +0200
+++ b/xen/arch/x86/hvm/vmx/vpmu_core2.c Thu Jun 23 18:34:55 2011 +0100
@@ -22,6 +22,7 @@
 #include <xen/config.h>
 #include <xen/sched.h>
 #include <xen/xenoprof.h>
+#include <xen/irq.h>
 #include <asm/system.h>
 #include <asm/regs.h>
 #include <asm/types.h>
diff -r 3dcb553f3ba9 -r b24018319772 xen/arch/x86/irq.c
--- a/xen/arch/x86/irq.c        Fri Jun 10 10:47:29 2011 +0200
+++ b/xen/arch/x86/irq.c        Thu Jun 23 18:34:55 2011 +0100
@@ -814,7 +814,7 @@
         {
             struct domain *d = action->guest[i];
             unsigned int pirq = domain_irq_to_pirq(d, irq);
-            if ( test_and_clear_bit(pirq, d->pirq_mask) )
+            if ( test_and_clear_bool(pirq_info(d, pirq)->masked) )
                 action->in_flight--;
         }
     }
@@ -874,11 +874,12 @@
 
     for ( i = 0; i < action->nr_guests; i++ )
     {
-        unsigned int pirq;
+        struct pirq *pirq;
+
         d = action->guest[i];
-        pirq = domain_irq_to_pirq(d, irq);
+        pirq = pirq_info(d, domain_irq_to_pirq(d, irq));
         if ( (action->ack_type != ACKTYPE_NONE) &&
-             !test_and_set_bit(pirq, d->pirq_mask) )
+             !test_and_set_bool(pirq->masked) )
             action->in_flight++;
         if ( hvm_do_IRQ_dpci(d, pirq) )
         {
@@ -950,31 +951,74 @@
     return desc;
 }
 
-static int prepare_domain_irq_pirq(struct domain *d, int irq, int pirq)
+/*
+ * Same with struct pirq already looked up, and d->event_lock already
+ * held (thus the PIRQ <-> IRQ mapping can't change under our feet).
+ */
+struct irq_desc *pirq_spin_lock_irq_desc(
+    struct domain *d, const struct pirq *pirq, unsigned long *pflags)
+{
+    int irq = pirq->arch.irq;
+    struct irq_desc *desc;
+    unsigned long flags;
+
+    ASSERT(spin_is_locked(&d->event_lock));
+
+    if ( irq <= 0 )
+        return NULL;
+
+    desc = irq_to_desc(irq);
+    spin_lock_irqsave(&desc->lock, flags);
+
+    if ( pflags )
+        *pflags = flags;
+
+    ASSERT(pirq == pirq_info(d, domain_irq_to_pirq(d, irq)));
+    ASSERT(irq == pirq->arch.irq);
+
+    return desc;
+}
+
+static int prepare_domain_irq_pirq(struct domain *d, int irq, int pirq,
+                                struct pirq **pinfo)
 {
     int err = radix_tree_insert(&d->arch.irq_pirq, irq,
                                 radix_tree_int_to_ptr(0));
-    return (err != -EEXIST) ? err : 0;
+    struct pirq *info;
+
+    if ( err && err != -EEXIST )
+        return err;
+    info = pirq_get_info(d, pirq);
+    if ( !info )
+    {
+        if ( !err )
+            radix_tree_delete(&d->arch.irq_pirq, irq);
+        return -ENOMEM;
+    }
+    *pinfo = info;
+    return 0;
 }
 
-static void set_domain_irq_pirq(struct domain *d, int irq, int pirq)
+static void set_domain_irq_pirq(struct domain *d, int irq, struct pirq *pirq)
 {
     radix_tree_replace_slot(
         radix_tree_lookup_slot(&d->arch.irq_pirq, irq),
-        radix_tree_int_to_ptr(pirq));
-    d->arch.pirq_irq[pirq] = irq;
+        radix_tree_int_to_ptr(pirq->pirq));
+    pirq->arch.irq = irq;
 }
 
-static void clear_domain_irq_pirq(struct domain *d, int irq, int pirq)
+static void clear_domain_irq_pirq(struct domain *d, int irq, struct pirq *pirq)
 {
-    d->arch.pirq_irq[pirq] = 0;
+    pirq->arch.irq = 0;
     radix_tree_replace_slot(
         radix_tree_lookup_slot(&d->arch.irq_pirq, irq),
         radix_tree_int_to_ptr(0));
 }
 
-static void cleanup_domain_irq_pirq(struct domain *d, int irq, int pirq)
+static void cleanup_domain_irq_pirq(struct domain *d, int irq,
+                                    struct pirq *pirq)
 {
+    pirq_cleanup_check(pirq, d);
     radix_tree_delete(&d->arch.irq_pirq, irq);
 }
 
@@ -989,12 +1033,14 @@
 
     for ( i = 1; platform_legacy_irq(i); ++i )
     {
+        struct pirq *info;
+
         if ( IO_APIC_IRQ(i) )
             continue;
-        err = prepare_domain_irq_pirq(d, i, i);
+        err = prepare_domain_irq_pirq(d, i, i, &info);
         if ( err )
             break;
-        set_domain_irq_pirq(d, i, i);
+        set_domain_irq_pirq(d, i, info);
     }
 
     if ( err )
@@ -1009,6 +1055,48 @@
         radix_tree_destroy(&d->arch.hvm_domain.emuirq_pirq, NULL);
 }
 
+struct pirq *alloc_pirq_struct(struct domain *d)
+{
+    size_t sz = is_hvm_domain(d) ? sizeof(struct pirq) :
+                                   offsetof(struct pirq, arch.hvm);
+    struct pirq *pirq = xmalloc_bytes(sz);
+
+    if ( pirq )
+    {
+        memset(pirq, 0, sz);
+        if ( is_hvm_domain(d) )
+        {
+            pirq->arch.hvm.emuirq = IRQ_UNBOUND;
+            pt_pirq_init(d, &pirq->arch.hvm.dpci);
+        }
+    }
+
+    return pirq;
+}
+
+void (pirq_cleanup_check)(struct pirq *pirq, struct domain *d)
+{
+    /*
+     * Check whether all fields have their default values, and delete
+     * the entry from the tree if so.
+     *
+     * NB: Common parts were already checked.
+     */
+    if ( pirq->arch.irq )
+        return;
+
+    if ( is_hvm_domain(d) )
+    {
+        if ( pirq->arch.hvm.emuirq != IRQ_UNBOUND )
+            return;
+        if ( !pt_pirq_cleanup_check(&pirq->arch.hvm.dpci) )
+            return;
+    }
+
+    if ( radix_tree_delete(&d->pirq_tree, pirq->pirq) != pirq )
+        BUG();
+}
+
 /* Flush all ready EOIs from the top of this CPU's pending-EOI stack. */
 static void flush_ready_eoi(void)
 {
@@ -1069,18 +1157,22 @@
     flush_ready_eoi();
 }
 
-static void __pirq_guest_eoi(struct domain *d, int pirq)
+void pirq_guest_eoi(struct domain *d, struct pirq *pirq)
 {
-    struct irq_desc         *desc;
+    struct irq_desc *desc;
+
+    ASSERT(local_irq_is_enabled());
+    desc = pirq_spin_lock_irq_desc(d, pirq, NULL);
+    if ( desc )
+        desc_guest_eoi(d, desc, pirq);
+}
+
+void desc_guest_eoi(struct domain *d, struct irq_desc *desc, struct pirq *pirq)
+{
     irq_guest_action_t *action;
     cpumask_t           cpu_eoi_map;
     int                 irq;
 
-    ASSERT(local_irq_is_enabled());
-    desc = domain_spin_lock_irq_desc(d, pirq, NULL);
-    if ( desc == NULL )
-        return;
-
     if ( !(desc->status & IRQ_GUEST) )
     {
         spin_unlock_irq(&desc->lock);
@@ -1092,12 +1184,12 @@
 
     if ( action->ack_type == ACKTYPE_NONE )
     {
-        ASSERT(!test_bit(pirq, d->pirq_mask));
+        ASSERT(!pirq->masked);
         stop_timer(&action->eoi_timer);
         _irq_guest_eoi(desc);
     }
 
-    if ( unlikely(!test_and_clear_bit(pirq, d->pirq_mask)) ||
+    if ( unlikely(!test_and_clear_bool(pirq->masked)) ||
          unlikely(--action->in_flight != 0) )
     {
         spin_unlock_irq(&desc->lock);
@@ -1132,27 +1224,22 @@
         on_selected_cpus(&cpu_eoi_map, set_eoi_ready, desc, 0);
 }
 
-int pirq_guest_eoi(struct domain *d, int irq)
-{
-    if ( (irq < 0) || (irq >= d->nr_pirqs) )
-        return -EINVAL;
-
-    __pirq_guest_eoi(d, irq);
-
-    return 0;
-}
-
 int pirq_guest_unmask(struct domain *d)
 {
-    unsigned int irq, nr = d->nr_pirqs;
+    unsigned int pirq = 0, n, i;
+    struct pirq *pirqs[16];
 
-    for ( irq = find_first_bit(d->pirq_mask, nr);
-          irq < nr;
-          irq = find_next_bit(d->pirq_mask, nr, irq+1) )
-    {
-        if ( !test_bit(d->pirq_to_evtchn[irq], &shared_info(d, evtchn_mask)) )
-            __pirq_guest_eoi(d, irq);
-    }
+    do {
+        n = radix_tree_gang_lookup(&d->pirq_tree, (void **)pirqs, pirq,
+                                   ARRAY_SIZE(pirqs));
+        for ( i = 0; i < n; ++i )
+        {
+            pirq = pirqs[i]->pirq;
+            if ( pirqs[i]->masked &&
+                 !test_bit(pirqs[i]->evtchn, &shared_info(d, evtchn_mask)) )
+                pirq_guest_eoi(d, pirqs[i]);
+        }
+    } while ( ++pirq < d->nr_pirqs && n == ARRAY_SIZE(pirqs) );
 
     return 0;
 }
@@ -1222,7 +1309,7 @@
     return shared;
 }
 
-int pirq_guest_bind(struct vcpu *v, int pirq, int will_share)
+int pirq_guest_bind(struct vcpu *v, struct pirq *pirq, int will_share)
 {
     unsigned int        irq;
     struct irq_desc         *desc;
@@ -1234,7 +1321,7 @@
     BUG_ON(!local_irq_is_enabled());
 
  retry:
-    desc = domain_spin_lock_irq_desc(v->domain, pirq, NULL);
+    desc = pirq_spin_lock_irq_desc(v->domain, pirq, NULL);
     if ( desc == NULL )
     {
         rc = -EINVAL;
@@ -1250,7 +1337,7 @@
         {
             gdprintk(XENLOG_INFO,
                     "Cannot bind IRQ %d to guest. In use by '%s'.\n",
-                    pirq, desc->action->name);
+                    pirq->pirq, desc->action->name);
             rc = -EBUSY;
             goto unlock_out;
         }
@@ -1262,7 +1349,7 @@
                 goto retry;
             gdprintk(XENLOG_INFO,
                      "Cannot bind IRQ %d to guest. Out of memory.\n",
-                     pirq);
+                     pirq->pirq);
             rc = -ENOMEM;
             goto out;
         }
@@ -1274,7 +1361,7 @@
         action->nr_guests   = 0;
         action->in_flight   = 0;
         action->shareable   = will_share;
-        action->ack_type    = pirq_acktype(v->domain, pirq);
+        action->ack_type    = pirq_acktype(v->domain, pirq->pirq);
         cpus_clear(action->cpu_eoi_map);
         init_timer(&action->eoi_timer, irq_guest_eoi_timer_fn, desc, 0);
 
@@ -1291,7 +1378,7 @@
     else if ( !will_share || !action->shareable )
     {
         gdprintk(XENLOG_INFO, "Cannot bind IRQ %d to guest. %s.\n",
-                 pirq,
+                 pirq->pirq,
                  will_share ?
                  "Others do not share" :
                  "Will not share with others");
@@ -1314,7 +1401,7 @@
     if ( action->nr_guests == IRQ_MAX_GUESTS )
     {
         gdprintk(XENLOG_INFO, "Cannot bind IRQ %d to guest. "
-               "Already at max share.\n", pirq);
+               "Already at max share.\n", pirq->pirq);
         rc = -EBUSY;
         goto unlock_out;
     }
@@ -1322,9 +1409,9 @@
     action->guest[action->nr_guests++] = v->domain;
 
     if ( action->ack_type != ACKTYPE_NONE )
-        set_pirq_eoi(v->domain, pirq);
+        set_pirq_eoi(v->domain, pirq->pirq);
     else
-        clear_pirq_eoi(v->domain, pirq);
+        clear_pirq_eoi(v->domain, pirq->pirq);
 
  unlock_out:
     spin_unlock_irq(&desc->lock);
@@ -1335,7 +1422,7 @@
 }
 
 static irq_guest_action_t *__pirq_guest_unbind(
-    struct domain *d, int pirq, struct irq_desc *desc)
+    struct domain *d, struct pirq *pirq, struct irq_desc *desc)
 {
     unsigned int        irq;
     irq_guest_action_t *action;
@@ -1350,7 +1437,7 @@
     if ( unlikely(action == NULL) )
     {
         dprintk(XENLOG_G_WARNING, "dom%d: pirq %d: desc->action is NULL!\n",
-                d->domain_id, pirq);
+                d->domain_id, pirq->pirq);
         return NULL;
     }
 
@@ -1364,13 +1451,13 @@
     switch ( action->ack_type )
     {
     case ACKTYPE_UNMASK:
-        if ( test_and_clear_bit(pirq, d->pirq_mask) &&
+        if ( test_and_clear_bool(pirq->masked) &&
              (--action->in_flight == 0) )
             desc->handler->end(irq);
         break;
     case ACKTYPE_EOI:
         /* NB. If #guests == 0 then we clear the eoi_map later on. */
-        if ( test_and_clear_bit(pirq, d->pirq_mask) &&
+        if ( test_and_clear_bool(pirq->masked) &&
              (--action->in_flight == 0) &&
              (action->nr_guests != 0) )
         {
@@ -1388,9 +1475,9 @@
 
     /*
      * The guest cannot re-bind to this IRQ until this function returns. So,
-     * when we have flushed this IRQ from pirq_mask, it should remain flushed.
+     * when we have flushed this IRQ from ->masked, it should remain flushed.
      */
-    BUG_ON(test_bit(pirq, d->pirq_mask));
+    BUG_ON(pirq->masked);
 
     if ( action->nr_guests != 0 )
         return NULL;
@@ -1428,7 +1515,7 @@
     return action;
 }
 
-void pirq_guest_unbind(struct domain *d, int pirq)
+void pirq_guest_unbind(struct domain *d, struct pirq *pirq)
 {
     irq_guest_action_t *oldaction = NULL;
     struct irq_desc *desc;
@@ -1437,11 +1524,11 @@
     WARN_ON(!spin_is_locked(&d->event_lock));
 
     BUG_ON(!local_irq_is_enabled());
-    desc = domain_spin_lock_irq_desc(d, pirq, NULL);
+    desc = pirq_spin_lock_irq_desc(d, pirq, NULL);
 
     if ( desc == NULL )
     {
-        irq = -domain_pirq_to_irq(d, pirq);
+        irq = -pirq->arch.irq;
         BUG_ON(irq <= 0);
         desc = irq_to_desc(irq);
         spin_lock_irq(&desc->lock);
@@ -1463,7 +1550,7 @@
         cleanup_domain_irq_pirq(d, irq, pirq);
 }
 
-static int pirq_guest_force_unbind(struct domain *d, int irq)
+static int pirq_guest_force_unbind(struct domain *d, struct pirq *pirq)
 {
     struct irq_desc *desc;
     irq_guest_action_t *action, *oldaction = NULL;
@@ -1472,7 +1559,7 @@
     WARN_ON(!spin_is_locked(&d->event_lock));
 
     BUG_ON(!local_irq_is_enabled());
-    desc = domain_spin_lock_irq_desc(d, irq, NULL);
+    desc = pirq_spin_lock_irq_desc(d, pirq, NULL);
     BUG_ON(desc == NULL);
 
     if ( !(desc->status & IRQ_GUEST) )
@@ -1482,7 +1569,7 @@
     if ( unlikely(action == NULL) )
     {
         dprintk(XENLOG_G_WARNING, "dom%d: pirq %d: desc->action is NULL!\n",
-            d->domain_id, irq);
+            d->domain_id, pirq->pirq);
         goto out;
     }
 
@@ -1492,7 +1579,7 @@
         goto out;
 
     bound = 1;
-    oldaction = __pirq_guest_unbind(d, irq, desc);
+    oldaction = __pirq_guest_unbind(d, pirq, desc);
 
  out:
     spin_unlock_irq(&desc->lock);
@@ -1506,6 +1593,13 @@
     return bound;
 }
 
+static inline bool_t is_free_pirq(const struct domain *d,
+                                  const struct pirq *pirq)
+{
+    return !pirq || (!pirq->arch.irq && (!is_hvm_domain(d) ||
+        pirq->arch.hvm.emuirq == IRQ_UNBOUND));
+}
+
 int get_free_pirq(struct domain *d, int type, int index)
 {
     int i;
@@ -1515,29 +1609,17 @@
     if ( type == MAP_PIRQ_TYPE_GSI )
     {
         for ( i = 16; i < nr_irqs_gsi; i++ )
-            if ( !d->arch.pirq_irq[i] )
-            {
-                if ( !is_hvm_domain(d) ||
-                        d->arch.pirq_emuirq[i] == IRQ_UNBOUND )
-                    break;
-            }
-        if ( i == nr_irqs_gsi )
-            return -ENOSPC;
+            if ( is_free_pirq(d, pirq_info(d, i)) )
+                return i;
     }
     else
     {
         for ( i = d->nr_pirqs - 1; i >= nr_irqs_gsi; i-- )
-            if ( !d->arch.pirq_irq[i] )
-            {
-                if ( !is_hvm_domain(d) ||
-                        d->arch.pirq_emuirq[i] == IRQ_UNBOUND )
-                    break;
-            }
-        if ( i < nr_irqs_gsi )
-            return -ENOSPC;
+            if ( is_free_pirq(d, pirq_info(d, i)) )
+                return i;
     }
 
-    return i;
+    return -ENOSPC;
 }
 
 int map_domain_pirq(
@@ -1545,6 +1627,7 @@
 {
     int ret = 0;
     int old_irq, old_pirq;
+    struct pirq *info;
     struct irq_desc *desc;
     unsigned long flags;
     struct msi_desc *msi_desc;
@@ -1584,7 +1667,7 @@
         return ret;
     }
 
-    ret = prepare_domain_irq_pirq(d, irq, pirq);
+    ret = prepare_domain_irq_pirq(d, irq, pirq, &info);
     if ( ret )
         return ret;
 
@@ -1609,20 +1692,20 @@
             dprintk(XENLOG_G_ERR, "dom%d: irq %d in use\n",
               d->domain_id, irq);
         desc->handler = &pci_msi_type;
-        set_domain_irq_pirq(d, irq, pirq);
+        set_domain_irq_pirq(d, irq, info);
         setup_msi_irq(pdev, msi_desc, irq);
         spin_unlock_irqrestore(&desc->lock, flags);
     }
     else
     {
         spin_lock_irqsave(&desc->lock, flags);
-        set_domain_irq_pirq(d, irq, pirq);
+        set_domain_irq_pirq(d, irq, info);
         spin_unlock_irqrestore(&desc->lock, flags);
     }
 
  done:
     if ( ret )
-        cleanup_domain_irq_pirq(d, irq, pirq);
+        cleanup_domain_irq_pirq(d, irq, info);
     return ret;
 }
 
@@ -1633,6 +1716,7 @@
     struct irq_desc *desc;
     int irq, ret = 0;
     bool_t forced_unbind;
+    struct pirq *info;
     struct msi_desc *msi_desc = NULL;
 
     if ( (pirq < 0) || (pirq >= d->nr_pirqs) )
@@ -1641,8 +1725,8 @@
     ASSERT(spin_is_locked(&pcidevs_lock));
     ASSERT(spin_is_locked(&d->event_lock));
 
-    irq = domain_pirq_to_irq(d, pirq);
-    if ( irq <= 0 )
+    info = pirq_info(d, pirq);
+    if ( !info || (irq = info->arch.irq) <= 0 )
     {
         dprintk(XENLOG_G_ERR, "dom%d: pirq %d not mapped\n",
                 d->domain_id, pirq);
@@ -1650,7 +1734,7 @@
         goto done;
     }
 
-    forced_unbind = pirq_guest_force_unbind(d, pirq);
+    forced_unbind = pirq_guest_force_unbind(d, info);
     if ( forced_unbind )
         dprintk(XENLOG_G_WARNING, "dom%d: forcing unbind of pirq %d\n",
                 d->domain_id, pirq);
@@ -1665,10 +1749,10 @@
     BUG_ON(irq != domain_pirq_to_irq(d, pirq));
 
     if ( !forced_unbind )
-        clear_domain_irq_pirq(d, irq, pirq);
+        clear_domain_irq_pirq(d, irq, info);
     else
     {
-        d->arch.pirq_irq[pirq] = -irq;
+        info->arch.irq = -irq;
         radix_tree_replace_slot(
             radix_tree_lookup_slot(&d->arch.irq_pirq, irq),
             radix_tree_int_to_ptr(-pirq));
@@ -1679,7 +1763,7 @@
         msi_free_irq(msi_desc);
 
     if ( !forced_unbind )
-        cleanup_domain_irq_pirq(d, irq, pirq);
+        cleanup_domain_irq_pirq(d, irq, info);
 
     ret = irq_deny_access(d, pirq);
     if ( ret )
@@ -1701,7 +1785,7 @@
     spin_lock(&d->event_lock);
 
     for ( i = 0; i < d->nr_pirqs; i++ )
-        if ( d->arch.pirq_irq[i] > 0 )
+        if ( domain_pirq_to_irq(d, i) > 0 )
             unmap_domain_pirq(d, i);
 
     spin_unlock(&d->event_lock);
@@ -1715,6 +1799,7 @@
     struct irq_cfg *cfg;
     irq_guest_action_t *action;
     struct domain *d;
+    const struct pirq *info;
     unsigned long flags;
 
     printk("Guest interrupt information:\n");
@@ -1749,20 +1834,18 @@
             {
                 d = action->guest[i];
                 pirq = domain_irq_to_pirq(d, irq);
+                info = pirq_info(d, pirq);
                 printk("%u:%3d(%c%c%c%c)",
                        d->domain_id, pirq,
-                       (test_bit(d->pirq_to_evtchn[pirq],
+                       (test_bit(info->evtchn,
                                  &shared_info(d, evtchn_pending)) ?
                         'P' : '-'),
-                       (test_bit(d->pirq_to_evtchn[pirq] /
-                                 BITS_PER_EVTCHN_WORD(d),
+                       (test_bit(info->evtchn / BITS_PER_EVTCHN_WORD(d),
                                  &vcpu_info(d->vcpu[0], evtchn_pending_sel)) ?
                         'S' : '-'),
-                       (test_bit(d->pirq_to_evtchn[pirq],
-                                 &shared_info(d, evtchn_mask)) ?
+                       (test_bit(info->evtchn, &shared_info(d, evtchn_mask)) ?
                         'M' : '-'),
-                       (test_bit(pirq, d->pirq_mask) ?
-                        'M' : '-'));
+                       (info->masked ? 'M' : '-'));
                 if ( i != action->nr_guests )
                     printk(",");
             }
@@ -1869,6 +1952,7 @@
 int map_domain_emuirq_pirq(struct domain *d, int pirq, int emuirq)
 {
     int old_emuirq = IRQ_UNBOUND, old_pirq = IRQ_UNBOUND;
+    struct pirq *info;
 
     ASSERT(spin_is_locked(&d->event_lock));
 
@@ -1895,6 +1979,10 @@
         return 0;
     }
 
+    info = pirq_get_info(d, pirq);
+    if ( !info )
+        return -ENOMEM;
+
     /* do not store emuirq mappings for pt devices */
     if ( emuirq != IRQ_PT )
     {
@@ -1912,10 +2000,11 @@
                 radix_tree_int_to_ptr(pirq));
             break;
         default:
+            pirq_cleanup_check(info, d);
             return err;
         }
     }
-    d->arch.pirq_emuirq[pirq] = emuirq;
+    info->arch.hvm.emuirq = emuirq;
 
     return 0;
 }
@@ -1923,6 +2012,7 @@
 int unmap_domain_pirq_emuirq(struct domain *d, int pirq)
 {
     int emuirq, ret = 0;
+    struct pirq *info;
 
     if ( !is_hvm_domain(d) )
         return -EINVAL;
@@ -1941,7 +2031,12 @@
         goto done;
     }
 
-    d->arch.pirq_emuirq[pirq] = IRQ_UNBOUND;
+    info = pirq_info(d, pirq);
+    if ( info )
+    {
+        info->arch.hvm.emuirq = IRQ_UNBOUND;
+        pirq_cleanup_check(info, d);
+    }
     if ( emuirq != IRQ_PT )
         radix_tree_delete(&d->arch.hvm_domain.emuirq_pirq, emuirq);
 
@@ -1949,10 +2044,9 @@
     return ret;
 }
 
-int hvm_domain_use_pirq(struct domain *d, int pirq)
+bool_t hvm_domain_use_pirq(const struct domain *d, const struct pirq *pirq)
 {
-    if ( !is_hvm_domain(d) || pirq < 0 )
-        return 0;
-
-    return (domain_pirq_to_emuirq(d, pirq) != IRQ_UNBOUND);
+    return is_hvm_domain(d) && pirq &&
+           pirq->arch.hvm.emuirq != IRQ_UNBOUND &&
+           pirq->evtchn != 0;
 }
diff -r 3dcb553f3ba9 -r b24018319772 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Fri Jun 10 10:47:29 2011 +0200
+++ b/xen/arch/x86/mm.c Thu Jun 23 18:34:55 2011 +0100
@@ -153,6 +153,8 @@
     (FRAMETABLE_SIZE / sizeof(*frame_table) + PDX_GROUP_COUNT - 1)
     / PDX_GROUP_COUNT)] = { [0] = 1 };
 
+bool_t __read_mostly machine_to_phys_mapping_valid = 0;
+
 #define PAGE_CACHE_ATTRS (_PAGE_PAT|_PAGE_PCD|_PAGE_PWT)
 
 bool_t __read_mostly opt_allow_superpage;
diff -r 3dcb553f3ba9 -r b24018319772 xen/arch/x86/physdev.c
--- a/xen/arch/x86/physdev.c    Fri Jun 10 10:47:29 2011 +0200
+++ b/xen/arch/x86/physdev.c    Thu Jun 23 18:34:55 2011 +0100
@@ -252,20 +252,28 @@
     {
     case PHYSDEVOP_eoi: {
         struct physdev_eoi eoi;
+        struct pirq *pirq;
+
         ret = -EFAULT;
         if ( copy_from_guest(&eoi, arg, 1) != 0 )
             break;
         ret = -EINVAL;
         if ( eoi.irq >= v->domain->nr_pirqs )
             break;
+        spin_lock(&v->domain->event_lock);
+        pirq = pirq_info(v->domain, eoi.irq);
+        if ( !pirq ) {
+            spin_unlock(&v->domain->event_lock);
+            break;
+        }
         if ( !is_hvm_domain(v->domain) &&
              v->domain->arch.pv_domain.pirq_eoi_map )
-            evtchn_unmask(v->domain->pirq_to_evtchn[eoi.irq]);
+            evtchn_unmask(pirq->evtchn);
         if ( !is_hvm_domain(v->domain) ||
-             domain_pirq_to_emuirq(v->domain, eoi.irq) == IRQ_PT )
-            ret = pirq_guest_eoi(v->domain, eoi.irq);
-        else
-            ret = 0;
+             pirq->arch.hvm.emuirq == IRQ_PT )
+            pirq_guest_eoi(v->domain, pirq);
+        spin_unlock(&v->domain->event_lock);
+        ret = 0;
         break;
     }
 
@@ -558,11 +566,23 @@
             break;
 
         spin_lock(&d->event_lock);
-        out.pirq = get_free_pirq(d, out.type, 0);
-        d->arch.pirq_irq[out.pirq] = PIRQ_ALLOCATED;
+        ret = get_free_pirq(d, out.type, 0);
+        if ( ret >= 0 )
+        {
+            struct pirq *info = pirq_get_info(d, ret);
+
+            if ( info )
+                info->arch.irq = PIRQ_ALLOCATED;
+            else
+                ret = -ENOMEM;
+        }
         spin_unlock(&d->event_lock);
 
-        ret = copy_to_guest(arg, &out, 1) ? -EFAULT : 0;
+        if ( ret >= 0 )
+        {
+            out.pirq = ret;
+            ret = copy_to_guest(arg, &out, 1) ? -EFAULT : 0;
+        }
 
         rcu_unlock_domain(d);
         break;
diff -r 3dcb553f3ba9 -r b24018319772 xen/arch/x86/platform_hypercall.c
--- a/xen/arch/x86/platform_hypercall.c Fri Jun 10 10:47:29 2011 +0200
+++ b/xen/arch/x86/platform_hypercall.c Thu Jun 23 18:34:55 2011 +0100
@@ -21,6 +21,7 @@
 #include <xen/acpi.h>
 #include <xen/cpu.h>
 #include <xen/pmstat.h>
+#include <xen/irq.h>
 #include <asm/current.h>
 #include <public/platform.h>
 #include <acpi/cpufreq/processor_perf.h>
diff -r 3dcb553f3ba9 -r b24018319772 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Fri Jun 10 10:47:29 2011 +0200
+++ b/xen/arch/x86/traps.c      Thu Jun 23 18:34:55 2011 +0100
@@ -1422,6 +1422,7 @@
         unsigned long *stk = (unsigned long *)regs;
         printk("Early fatal page fault at %04x:%p (cr2=%p, ec=%04x)\n", 
                regs->cs, _p(regs->eip), _p(cr2), regs->error_code);
+        show_page_walk(cr2);
         printk("Stack dump: ");
         while ( ((long)stk & ((PAGE_SIZE - 1) & ~(BYTES_PER_LONG - 1))) != 0 )
             printk("%p ", _p(*stk++));
diff -r 3dcb553f3ba9 -r b24018319772 xen/arch/x86/x86_32/mm.c
--- a/xen/arch/x86/x86_32/mm.c  Fri Jun 10 10:47:29 2011 +0200
+++ b/xen/arch/x86/x86_32/mm.c  Thu Jun 23 18:34:55 2011 +0100
@@ -37,8 +37,6 @@
 unsigned int __read_mostly PAGE_HYPERVISOR         = __PAGE_HYPERVISOR;
 unsigned int __read_mostly PAGE_HYPERVISOR_NOCACHE = __PAGE_HYPERVISOR_NOCACHE;
 
-bool_t __read_mostly machine_to_phys_mapping_valid = 0;
-
 static unsigned long __read_mostly mpt_size;
 
 void *alloc_xen_pagetable(void)
diff -r 3dcb553f3ba9 -r b24018319772 xen/arch/x86/x86_32/traps.c
--- a/xen/arch/x86/x86_32/traps.c       Fri Jun 10 10:47:29 2011 +0200
+++ b/xen/arch/x86/x86_32/traps.c       Thu Jun 23 18:34:55 2011 +0100
@@ -163,7 +163,8 @@
     l3t += (cr3 & 0xFE0UL) >> 3;
     l3e = l3t[l3_table_offset(addr)];
     mfn = l3e_get_pfn(l3e);
-    pfn = mfn_valid(mfn) ? get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
+    pfn = mfn_valid(mfn) && machine_to_phys_mapping_valid ?
+          get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
     printk(" L3[0x%03lx] = %"PRIpte" %08lx\n",
            l3_table_offset(addr), l3e_get_intpte(l3e), pfn);
     unmap_domain_page(l3t);
@@ -174,7 +175,8 @@
     l2t = map_domain_page(mfn);
     l2e = l2t[l2_table_offset(addr)];
     mfn = l2e_get_pfn(l2e);
-    pfn = mfn_valid(mfn) ? get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
+    pfn = mfn_valid(mfn) && machine_to_phys_mapping_valid ?
+          get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
     printk(" L2[0x%03lx] = %"PRIpte" %08lx %s\n",
            l2_table_offset(addr), l2e_get_intpte(l2e), pfn,
            (l2e_get_flags(l2e) & _PAGE_PSE) ? "(PSE)" : "");
@@ -187,7 +189,8 @@
     l1t = map_domain_page(mfn);
     l1e = l1t[l1_table_offset(addr)];
     mfn = l1e_get_pfn(l1e);
-    pfn = mfn_valid(mfn) ? get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
+    pfn = mfn_valid(mfn) && machine_to_phys_mapping_valid ?
+          get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
     printk(" L1[0x%03lx] = %"PRIpte" %08lx\n",
            l1_table_offset(addr), l1e_get_intpte(l1e), pfn);
     unmap_domain_page(l1t);
diff -r 3dcb553f3ba9 -r b24018319772 xen/arch/x86/x86_64/mm.c
--- a/xen/arch/x86/x86_64/mm.c  Fri Jun 10 10:47:29 2011 +0200
+++ b/xen/arch/x86/x86_64/mm.c  Thu Jun 23 18:34:55 2011 +0100
@@ -47,8 +47,6 @@
 
 unsigned int __read_mostly m2p_compat_vstart = __HYPERVISOR_COMPAT_VIRT_START;
 
-bool_t __read_mostly machine_to_phys_mapping_valid = 0;
-
 /* Top-level master (and idle-domain) page directory. */
 l4_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
     idle_pg_table[L4_PAGETABLE_ENTRIES];
diff -r 3dcb553f3ba9 -r b24018319772 xen/arch/x86/x86_64/traps.c
--- a/xen/arch/x86/x86_64/traps.c       Fri Jun 10 10:47:29 2011 +0200
+++ b/xen/arch/x86/x86_64/traps.c       Thu Jun 23 18:34:55 2011 +0100
@@ -173,7 +173,8 @@
     l4t = mfn_to_virt(mfn);
     l4e = l4t[l4_table_offset(addr)];
     mfn = l4e_get_pfn(l4e);
-    pfn = mfn_valid(mfn) ? get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
+    pfn = mfn_valid(mfn) && machine_to_phys_mapping_valid ?
+          get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
     printk(" L4[0x%03lx] = %"PRIpte" %016lx\n",
            l4_table_offset(addr), l4e_get_intpte(l4e), pfn);
     if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) ||
@@ -183,7 +184,8 @@
     l3t = mfn_to_virt(mfn);
     l3e = l3t[l3_table_offset(addr)];
     mfn = l3e_get_pfn(l3e);
-    pfn = mfn_valid(mfn) ? get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
+    pfn = mfn_valid(mfn) && machine_to_phys_mapping_valid ?
+          get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
     printk(" L3[0x%03lx] = %"PRIpte" %016lx%s\n",
            l3_table_offset(addr), l3e_get_intpte(l3e), pfn,
            (l3e_get_flags(l3e) & _PAGE_PSE) ? " (PSE)" : "");
@@ -195,7 +197,8 @@
     l2t = mfn_to_virt(mfn);
     l2e = l2t[l2_table_offset(addr)];
     mfn = l2e_get_pfn(l2e);
-    pfn = mfn_valid(mfn) ? get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
+    pfn = mfn_valid(mfn) && machine_to_phys_mapping_valid ?
+          get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
     printk(" L2[0x%03lx] = %"PRIpte" %016lx %s\n",
            l2_table_offset(addr), l2e_get_intpte(l2e), pfn,
            (l2e_get_flags(l2e) & _PAGE_PSE) ? "(PSE)" : "");
@@ -207,7 +210,8 @@
     l1t = mfn_to_virt(mfn);
     l1e = l1t[l1_table_offset(addr)];
     mfn = l1e_get_pfn(l1e);
-    pfn = mfn_valid(mfn) ? get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
+    pfn = mfn_valid(mfn) && machine_to_phys_mapping_valid ?
+          get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
     printk(" L1[0x%03lx] = %"PRIpte" %016lx\n",
            l1_table_offset(addr), l1e_get_intpte(l1e), pfn);
 }
diff -r 3dcb553f3ba9 -r b24018319772 xen/common/domain.c
--- a/xen/common/domain.c       Fri Jun 10 10:47:29 2011 +0200
+++ b/xen/common/domain.c       Thu Jun 23 18:34:55 2011 +0100
@@ -30,6 +30,7 @@
 #include <xen/wait.h>
 #include <xen/grant_table.h>
 #include <xen/xenoprof.h>
+#include <xen/irq.h>
 #include <acpi/cpufreq/cpufreq.h>
 #include <asm/debugger.h>
 #include <public/sched.h>
@@ -292,13 +293,7 @@
         if ( d->nr_pirqs > nr_irqs )
             d->nr_pirqs = nr_irqs;
 
-        d->pirq_to_evtchn = xmalloc_array(u16, d->nr_pirqs);
-        d->pirq_mask = xmalloc_array(
-            unsigned long, BITS_TO_LONGS(d->nr_pirqs));
-        if ( (d->pirq_to_evtchn == NULL) || (d->pirq_mask == NULL) )
-            goto fail;
-        memset(d->pirq_to_evtchn, 0, d->nr_pirqs * sizeof(*d->pirq_to_evtchn));
-        bitmap_zero(d->pirq_mask, d->nr_pirqs);
+        radix_tree_init(&d->pirq_tree);
 
         if ( evtchn_init(d) != 0 )
             goto fail;
@@ -348,6 +343,7 @@
     {
         evtchn_destroy(d);
         evtchn_destroy_final(d);
+        radix_tree_destroy(&d->pirq_tree, free_pirq_struct);
     }
     if ( init_status & INIT_rangeset )
         rangeset_domain_destroy(d);
@@ -355,8 +351,6 @@
         watchdog_domain_destroy(d);
     if ( init_status & INIT_xsm )
         xsm_free_security_domain(d);
-    xfree(d->pirq_mask);
-    xfree(d->pirq_to_evtchn);
     free_cpumask_var(d->domain_dirty_cpumask);
     free_domain_struct(d);
     return NULL;
@@ -682,8 +676,7 @@
 
     evtchn_destroy_final(d);
 
-    xfree(d->pirq_mask);
-    xfree(d->pirq_to_evtchn);
+    radix_tree_destroy(&d->pirq_tree, free_pirq_struct);
 
     xsm_free_security_domain(d);
     free_cpumask_var(d->domain_dirty_cpumask);
@@ -965,6 +958,35 @@
     return -ENOSYS;
 }
 
+struct pirq *pirq_get_info(struct domain *d, int pirq)
+{
+    struct pirq *info = pirq_info(d, pirq);
+
+    if ( !info && (info = alloc_pirq_struct(d)) != NULL )
+    {
+        info->pirq = pirq;
+        if ( radix_tree_insert(&d->pirq_tree, pirq, info) )
+        {
+            free_pirq_struct(info);
+            info = NULL;
+        }
+    }
+
+    return info;
+}
+
+static void _free_pirq_struct(struct rcu_head *head)
+{
+    xfree(container_of(head, struct pirq, rcu_head));
+}
+
+void free_pirq_struct(void *ptr)
+{
+    struct pirq *pirq = ptr;
+
+    call_rcu(&pirq->rcu_head, _free_pirq_struct);
+}
+
 struct migrate_info {
     long (*func)(void *data);
     void *data;
diff -r 3dcb553f3ba9 -r b24018319772 xen/common/event_channel.c
--- a/xen/common/event_channel.c        Fri Jun 10 10:47:29 2011 +0200
+++ b/xen/common/event_channel.c        Thu Jun 23 18:34:55 2011 +0100
@@ -325,6 +325,7 @@
     struct evtchn *chn;
     struct domain *d = current->domain;
     struct vcpu   *v = d->vcpu[0];
+    struct pirq   *info;
     int            port, pirq = bind->pirq;
     long           rc;
 
@@ -336,7 +337,7 @@
 
     spin_lock(&d->event_lock);
 
-    if ( d->pirq_to_evtchn[pirq] != 0 )
+    if ( pirq_to_evtchn(d, pirq) != 0 )
         ERROR_EXIT(-EEXIST);
 
     if ( (port = get_free_port(d)) < 0 )
@@ -344,14 +345,18 @@
 
     chn = evtchn_from_port(d, port);
 
-    d->pirq_to_evtchn[pirq] = port;
+    info = pirq_get_info(d, pirq);
+    if ( !info )
+        ERROR_EXIT(-ENOMEM);
+    info->evtchn = port;
     rc = (!is_hvm_domain(d)
-          ? pirq_guest_bind(
-              v, pirq, !!(bind->flags & BIND_PIRQ__WILL_SHARE))
+          ? pirq_guest_bind(v, info,
+                            !!(bind->flags & BIND_PIRQ__WILL_SHARE))
           : 0);
     if ( rc != 0 )
     {
-        d->pirq_to_evtchn[pirq] = 0;
+        info->evtchn = 0;
+        pirq_cleanup_check(info, d);
         goto out;
     }
 
@@ -404,12 +409,18 @@
     case ECS_UNBOUND:
         break;
 
-    case ECS_PIRQ:
+    case ECS_PIRQ: {
+        struct pirq *pirq = pirq_info(d1, chn1->u.pirq.irq);
+
+        if ( !pirq )
+            break;
         if ( !is_hvm_domain(d1) )
-            pirq_guest_unbind(d1, chn1->u.pirq.irq);
-        d1->pirq_to_evtchn[chn1->u.pirq.irq] = 0;
+            pirq_guest_unbind(d1, pirq);
+        pirq->evtchn = 0;
+        pirq_cleanup_check(pirq, d1);
         unlink_pirq_port(chn1, d1->vcpu[chn1->notify_vcpu_id]);
         break;
+    }
 
     case ECS_VIRQ:
         for_each_vcpu ( d1, v )
@@ -659,9 +670,9 @@
     spin_unlock_irqrestore(&v->virq_lock, flags);
 }
 
-int send_guest_pirq(struct domain *d, int pirq)
+int send_guest_pirq(struct domain *d, const struct pirq *pirq)
 {
-    int port = d->pirq_to_evtchn[pirq];
+    int port;
     struct evtchn *chn;
 
     /*
@@ -670,7 +681,7 @@
      * HVM guests: Port is legitimately zero when the guest disables the
      *     emulated interrupt/evtchn.
      */
-    if ( port == 0 )
+    if ( pirq == NULL || (port = pirq->evtchn) == 0 )
     {
         BUG_ON(!is_hvm_domain(d));
         return 0;
@@ -812,13 +823,10 @@
     struct domain *d = current->domain;
     struct vcpu   *v;
 
-    spin_lock(&d->event_lock);
+    ASSERT(spin_is_locked(&d->event_lock));
 
     if ( unlikely(!port_is_valid(d, port)) )
-    {
-        spin_unlock(&d->event_lock);
         return -EINVAL;
-    }
 
     v = d->vcpu[evtchn_from_port(d, port)->notify_vcpu_id];
 
@@ -834,8 +842,6 @@
         vcpu_mark_events_pending(v);
     }
 
-    spin_unlock(&d->event_lock);
-
     return 0;
 }
 
@@ -960,7 +966,9 @@
         struct evtchn_unmask unmask;
         if ( copy_from_guest(&unmask, arg, 1) != 0 )
             return -EFAULT;
+        spin_lock(&current->domain->event_lock);
         rc = evtchn_unmask(unmask.port);
+        spin_unlock(&current->domain->event_lock);
         break;
     }
 
diff -r 3dcb553f3ba9 -r b24018319772 xen/common/preempt.c
--- a/xen/common/preempt.c      Fri Jun 10 10:47:29 2011 +0200
+++ b/xen/common/preempt.c      Thu Jun 23 18:34:55 2011 +0100
@@ -21,5 +21,12 @@
  */
 
 #include <xen/preempt.h>
+#include <xen/irq.h>
+#include <asm/system.h>
 
 DEFINE_PER_CPU(unsigned int, __preempt_count);
+
+bool_t in_atomic(void)
+{
+    return preempt_count() || in_irq() || !local_irq_is_enabled();
+}
diff -r 3dcb553f3ba9 -r b24018319772 xen/common/radix-tree.c
--- a/xen/common/radix-tree.c   Fri Jun 10 10:47:29 2011 +0200
+++ b/xen/common/radix-tree.c   Thu Jun 23 18:34:55 2011 +0100
@@ -22,6 +22,7 @@
 #include <xen/config.h>
 #include <xen/init.h>
 #include <xen/radix-tree.h>
+#include <xen/errno.h>
 
 struct radix_tree_path {
        struct radix_tree_node *node;
diff -r 3dcb553f3ba9 -r b24018319772 xen/drivers/passthrough/amd/iommu_acpi.c
--- a/xen/drivers/passthrough/amd/iommu_acpi.c  Fri Jun 10 10:47:29 2011 +0200
+++ b/xen/drivers/passthrough/amd/iommu_acpi.c  Thu Jun 23 18:34:55 2011 +0100
@@ -817,7 +817,7 @@
 
     BUG_ON(!table);
 
-    if ( amd_iommu_debug )
+    if ( iommu_debug )
         dump_acpi_table_header(table);
 
     /* parse IVRS blocks */
diff -r 3dcb553f3ba9 -r b24018319772 xen/drivers/passthrough/amd/iommu_init.c
--- a/xen/drivers/passthrough/amd/iommu_init.c  Fri Jun 10 10:47:29 2011 +0200
+++ b/xen/drivers/passthrough/amd/iommu_init.c  Thu Jun 23 18:34:55 2011 +0100
@@ -22,6 +22,7 @@
 #include <xen/errno.h>
 #include <xen/pci.h>
 #include <xen/pci_regs.h>
+#include <xen/irq.h>
 #include <asm/amd-iommu.h>
 #include <asm/msi.h>
 #include <asm/hvm/svm/amd-iommu-proto.h>
diff -r 3dcb553f3ba9 -r b24018319772 xen/drivers/passthrough/io.c
--- a/xen/drivers/passthrough/io.c      Fri Jun 10 10:47:29 2011 +0200
+++ b/xen/drivers/passthrough/io.c      Thu Jun 23 18:34:55 2011 +0100
@@ -20,6 +20,7 @@
 
 #include <xen/event.h>
 #include <xen/iommu.h>
+#include <xen/irq.h>
 #include <asm/hvm/irq.h>
 #include <asm/hvm/iommu.h>
 #include <asm/hvm/support.h>
@@ -35,18 +36,28 @@
     return !(flags & (HVM_IRQ_DPCI_GUEST_MSI | HVM_IRQ_DPCI_TRANSLATE));
 }
 
+static int pt_irq_guest_eoi(struct domain *d, struct hvm_pirq_dpci *pirq_dpci,
+                            void *arg)
+{
+    if ( __test_and_clear_bit(_HVM_IRQ_DPCI_EOI_LATCH_SHIFT,
+                              &pirq_dpci->flags) )
+    {
+        pirq_dpci->masked = 0;
+        pirq_dpci->pending = 0;
+        pirq_guest_eoi(d, dpci_pirq(pirq_dpci));
+    }
+
+    return 0;
+}
+
 static void pt_irq_time_out(void *data)
 {
-    struct hvm_mirq_dpci_mapping *irq_map = data;
-    unsigned int guest_gsi, machine_gsi = 0;
+    struct hvm_pirq_dpci *irq_map = data;
+    unsigned int guest_gsi;
     struct hvm_irq_dpci *dpci = NULL;
     struct dev_intx_gsi_link *digl;
     struct hvm_girq_dpci_mapping *girq;
     uint32_t device, intx;
-    unsigned int nr_pirqs = irq_map->dom->nr_pirqs;
-    DECLARE_BITMAP(machine_gsi_map, nr_pirqs);
-
-    bitmap_zero(machine_gsi_map, nr_pirqs);
 
     spin_lock(&irq_map->dom->event_lock);
 
@@ -57,32 +68,18 @@
         guest_gsi = digl->gsi;
         list_for_each_entry ( girq, &dpci->girq[guest_gsi], list )
         {
-            machine_gsi = girq->machine_gsi;
-            set_bit(machine_gsi, machine_gsi_map);
+            struct pirq *pirq = pirq_info(irq_map->dom, girq->machine_gsi);
+
+            pirq_dpci(pirq)->flags |= HVM_IRQ_DPCI_EOI_LATCH;
         }
         device = digl->device;
         intx = digl->intx;
         hvm_pci_intx_deassert(irq_map->dom, device, intx);
     }
 
-    for ( machine_gsi = find_first_bit(machine_gsi_map, nr_pirqs);
-          machine_gsi < nr_pirqs;
-          machine_gsi = find_next_bit(machine_gsi_map, nr_pirqs,
-                                      machine_gsi + 1) )
-    {
-        clear_bit(machine_gsi, dpci->dirq_mask);
-        dpci->mirq[machine_gsi].pending = 0;
-    }
+    pt_pirq_iterate(irq_map->dom, pt_irq_guest_eoi, NULL);
 
     spin_unlock(&irq_map->dom->event_lock);
-
-    for ( machine_gsi = find_first_bit(machine_gsi_map, nr_pirqs);
-          machine_gsi < nr_pirqs;
-          machine_gsi = find_next_bit(machine_gsi_map, nr_pirqs,
-                                      machine_gsi + 1) )
-    {
-        pirq_guest_eoi(irq_map->dom, machine_gsi);
-    }
 }
 
 struct hvm_irq_dpci *domain_get_irq_dpci(const struct domain *d)
@@ -95,10 +92,6 @@
 
 void free_hvm_irq_dpci(struct hvm_irq_dpci *dpci)
 {
-    xfree(dpci->mirq);
-    xfree(dpci->dirq_mask);
-    xfree(dpci->mapping);
-    xfree(dpci->hvm_timer);
     xfree(dpci);
 }
 
@@ -106,7 +99,9 @@
     struct domain *d, xen_domctl_bind_pt_irq_t *pt_irq_bind)
 {
     struct hvm_irq_dpci *hvm_irq_dpci = NULL;
-    uint32_t machine_gsi, guest_gsi;
+    struct hvm_pirq_dpci *pirq_dpci;
+    struct pirq *info;
+    uint32_t guest_gsi;
     uint32_t device, intx, link;
     struct dev_intx_gsi_link *digl;
     struct hvm_girq_dpci_mapping *girq;
@@ -130,63 +125,45 @@
         softirq_tasklet_init(
             &hvm_irq_dpci->dirq_tasklet,
             hvm_dirq_assist, (unsigned long)d);
-        hvm_irq_dpci->mirq = xmalloc_array(struct hvm_mirq_dpci_mapping,
-                                           d->nr_pirqs);
-        hvm_irq_dpci->dirq_mask = xmalloc_array(unsigned long,
-                                                BITS_TO_LONGS(d->nr_pirqs));
-        hvm_irq_dpci->mapping = xmalloc_array(unsigned long,
-                                              BITS_TO_LONGS(d->nr_pirqs));
-        hvm_irq_dpci->hvm_timer = xmalloc_array(struct timer, d->nr_pirqs);
-        if ( !hvm_irq_dpci->mirq ||
-             !hvm_irq_dpci->dirq_mask ||
-             !hvm_irq_dpci->mapping ||
-             !hvm_irq_dpci->hvm_timer)
-        {
-            spin_unlock(&d->event_lock);
-            free_hvm_irq_dpci(hvm_irq_dpci);
-            return -ENOMEM;
-        }
-        memset(hvm_irq_dpci->mirq, 0,
-               d->nr_pirqs * sizeof(*hvm_irq_dpci->mirq));
-        bitmap_zero(hvm_irq_dpci->dirq_mask, d->nr_pirqs);
-        bitmap_zero(hvm_irq_dpci->mapping, d->nr_pirqs);
-        memset(hvm_irq_dpci->hvm_timer, 0,
-                d->nr_pirqs * sizeof(*hvm_irq_dpci->hvm_timer));
-        for ( int i = 0; i < d->nr_pirqs; i++ ) {
-            INIT_LIST_HEAD(&hvm_irq_dpci->mirq[i].digl_list);
-            hvm_irq_dpci->mirq[i].gmsi.dest_vcpu_id = -1;
-        }
         for ( int i = 0; i < NR_HVM_IRQS; i++ )
             INIT_LIST_HEAD(&hvm_irq_dpci->girq[i]);
 
         d->arch.hvm_domain.irq.dpci = hvm_irq_dpci;
     }
 
+    info = pirq_get_info(d, pirq);
+    if ( !info )
+    {
+        spin_unlock(&d->event_lock);
+        return -ENOMEM;
+    }
+    pirq_dpci = pirq_dpci(info);
+
     if ( pt_irq_bind->irq_type == PT_IRQ_TYPE_MSI )
     {
         uint8_t dest, dest_mode;
         int dest_vcpu_id;
 
-        if ( !test_and_set_bit(pirq, hvm_irq_dpci->mapping))
+        if ( !(pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) )
         {
-            hvm_irq_dpci->mirq[pirq].flags = HVM_IRQ_DPCI_MACH_MSI |
-                                             HVM_IRQ_DPCI_GUEST_MSI;
-            hvm_irq_dpci->mirq[pirq].gmsi.gvec = pt_irq_bind->u.msi.gvec;
-            hvm_irq_dpci->mirq[pirq].gmsi.gflags = pt_irq_bind->u.msi.gflags;
+            pirq_dpci->flags = HVM_IRQ_DPCI_MAPPED | HVM_IRQ_DPCI_MACH_MSI |
+                               HVM_IRQ_DPCI_GUEST_MSI;
+            pirq_dpci->gmsi.gvec = pt_irq_bind->u.msi.gvec;
+            pirq_dpci->gmsi.gflags = pt_irq_bind->u.msi.gflags;
             /* bind after hvm_irq_dpci is setup to avoid race with irq 
handler*/
-            rc = pirq_guest_bind(d->vcpu[0], pirq, 0);
+            rc = pirq_guest_bind(d->vcpu[0], info, 0);
             if ( rc == 0 && pt_irq_bind->u.msi.gtable )
             {
-                rc = msixtbl_pt_register(d, pirq, pt_irq_bind->u.msi.gtable);
+                rc = msixtbl_pt_register(d, info, pt_irq_bind->u.msi.gtable);
                 if ( unlikely(rc) )
-                    pirq_guest_unbind(d, pirq);
+                    pirq_guest_unbind(d, info);
             }
             if ( unlikely(rc) )
             {
-                hvm_irq_dpci->mirq[pirq].gmsi.gflags = 0;
-                hvm_irq_dpci->mirq[pirq].gmsi.gvec = 0;
-                hvm_irq_dpci->mirq[pirq].flags = 0;
-                clear_bit(pirq, hvm_irq_dpci->mapping);
+                pirq_dpci->gmsi.gflags = 0;
+                pirq_dpci->gmsi.gvec = 0;
+                pirq_dpci->flags = 0;
+                pirq_cleanup_check(info, d);
                 spin_unlock(&d->event_lock);
                 return rc;
             }
@@ -195,34 +172,33 @@
         {
             uint32_t mask = HVM_IRQ_DPCI_MACH_MSI | HVM_IRQ_DPCI_GUEST_MSI;
 
-            if ( (hvm_irq_dpci->mirq[pirq].flags & mask) != mask)
+            if ( (pirq_dpci->flags & mask) != mask)
             {
                    spin_unlock(&d->event_lock);
                    return -EBUSY;
             }
 
             /* if pirq is already mapped as vmsi, update the guest data/addr */
-            if ( hvm_irq_dpci->mirq[pirq].gmsi.gvec != pt_irq_bind->u.msi.gvec 
||
-                    hvm_irq_dpci->mirq[pirq].gmsi.gflags != 
pt_irq_bind->u.msi.gflags) {
+            if ( pirq_dpci->gmsi.gvec != pt_irq_bind->u.msi.gvec ||
+                 pirq_dpci->gmsi.gflags != pt_irq_bind->u.msi.gflags) {
                 /* Directly clear pending EOIs before enabling new MSI info. */
-                pirq_guest_eoi(d, pirq);
+                pirq_guest_eoi(d, info);
 
-                hvm_irq_dpci->mirq[pirq].gmsi.gvec = pt_irq_bind->u.msi.gvec;
-                hvm_irq_dpci->mirq[pirq].gmsi.gflags = 
pt_irq_bind->u.msi.gflags;
+                pirq_dpci->gmsi.gvec = pt_irq_bind->u.msi.gvec;
+                pirq_dpci->gmsi.gflags = pt_irq_bind->u.msi.gflags;
             }
         }
         /* Caculate dest_vcpu_id for MSI-type pirq migration */
-        dest = hvm_irq_dpci->mirq[pirq].gmsi.gflags & VMSI_DEST_ID_MASK;
-        dest_mode = !!(hvm_irq_dpci->mirq[pirq].gmsi.gflags & VMSI_DM_MASK);
+        dest = pirq_dpci->gmsi.gflags & VMSI_DEST_ID_MASK;
+        dest_mode = !!(pirq_dpci->gmsi.gflags & VMSI_DM_MASK);
         dest_vcpu_id = hvm_girq_dest_2_vcpu_id(d, dest, dest_mode);
-        hvm_irq_dpci->mirq[pirq].gmsi.dest_vcpu_id = dest_vcpu_id;
+        pirq_dpci->gmsi.dest_vcpu_id = dest_vcpu_id;
         spin_unlock(&d->event_lock);
         if ( dest_vcpu_id >= 0 )
             hvm_migrate_pirqs(d->vcpu[dest_vcpu_id]);
     }
     else
     {
-        machine_gsi = pt_irq_bind->machine_irq;
         device = pt_irq_bind->u.pci.device;
         intx = pt_irq_bind->u.pci.intx;
         guest_gsi = hvm_pci_intx_gsi(device, intx);
@@ -248,50 +224,51 @@
         digl->intx = intx;
         digl->gsi = guest_gsi;
         digl->link = link;
-        list_add_tail(&digl->list,
-                      &hvm_irq_dpci->mirq[machine_gsi].digl_list);
+        list_add_tail(&digl->list, &pirq_dpci->digl_list);
 
         girq->device = device;
         girq->intx = intx;
-        girq->machine_gsi = machine_gsi;
+        girq->machine_gsi = pirq;
         list_add_tail(&girq->list, &hvm_irq_dpci->girq[guest_gsi]);
 
         /* Bind the same mirq once in the same domain */
-        if ( !test_and_set_bit(machine_gsi, hvm_irq_dpci->mapping))
+        if ( !(pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) )
         {
             unsigned int share;
 
-            hvm_irq_dpci->mirq[machine_gsi].dom = d;
+            pirq_dpci->dom = d;
             if ( pt_irq_bind->irq_type == PT_IRQ_TYPE_MSI_TRANSLATE )
             {
-                hvm_irq_dpci->mirq[machine_gsi].flags = HVM_IRQ_DPCI_MACH_MSI |
-                                                        HVM_IRQ_DPCI_GUEST_PCI 
|
-                                                        HVM_IRQ_DPCI_TRANSLATE;
+                pirq_dpci->flags = HVM_IRQ_DPCI_MAPPED |
+                                   HVM_IRQ_DPCI_MACH_MSI |
+                                   HVM_IRQ_DPCI_GUEST_PCI |
+                                   HVM_IRQ_DPCI_TRANSLATE;
                 share = 0;
             }
             else    /* PT_IRQ_TYPE_PCI */
             {
-                hvm_irq_dpci->mirq[machine_gsi].flags = HVM_IRQ_DPCI_MACH_PCI |
-                                                        HVM_IRQ_DPCI_GUEST_PCI;
+                pirq_dpci->flags = HVM_IRQ_DPCI_MAPPED |
+                                   HVM_IRQ_DPCI_MACH_PCI |
+                                   HVM_IRQ_DPCI_GUEST_PCI;
                 share = BIND_PIRQ__WILL_SHARE;
             }
 
             /* Init timer before binding */
-            if ( pt_irq_need_timer(hvm_irq_dpci->mirq[machine_gsi].flags) )
-                init_timer(&hvm_irq_dpci->hvm_timer[machine_gsi],
-                           pt_irq_time_out, &hvm_irq_dpci->mirq[machine_gsi], 
0);
+            if ( pt_irq_need_timer(pirq_dpci->flags) )
+                init_timer(&pirq_dpci->timer, pt_irq_time_out, pirq_dpci, 0);
             /* Deal with gsi for legacy devices */
-            rc = pirq_guest_bind(d->vcpu[0], machine_gsi, share);
+            rc = pirq_guest_bind(d->vcpu[0], info, share);
             if ( unlikely(rc) )
             {
-                if ( pt_irq_need_timer(hvm_irq_dpci->mirq[machine_gsi].flags) )
-                    kill_timer(&hvm_irq_dpci->hvm_timer[machine_gsi]);
-                hvm_irq_dpci->mirq[machine_gsi].dom = NULL;
-                clear_bit(machine_gsi, hvm_irq_dpci->mapping);
+                if ( pt_irq_need_timer(pirq_dpci->flags) )
+                    kill_timer(&pirq_dpci->timer);
+                pirq_dpci->dom = NULL;
                 list_del(&girq->list);
                 xfree(girq);
                 list_del(&digl->list);
                 hvm_irq_dpci->link_cnt[link]--;
+                pirq_dpci->flags = 0;
+                pirq_cleanup_check(info, d);
                 spin_unlock(&d->event_lock);
                 xfree(digl);
                 return rc;
@@ -303,7 +280,7 @@
         if ( iommu_verbose )
             dprintk(VTDPREFIX,
                     "d%d: bind: m_gsi=%u g_gsi=%u device=%u intx=%u\n",
-                    d->domain_id, machine_gsi, guest_gsi, device, intx);
+                    d->domain_id, pirq, guest_gsi, device, intx);
     }
     return 0;
 }
@@ -312,11 +289,12 @@
     struct domain *d, xen_domctl_bind_pt_irq_t *pt_irq_bind)
 {
     struct hvm_irq_dpci *hvm_irq_dpci = NULL;
+    struct hvm_pirq_dpci *pirq_dpci;
     uint32_t machine_gsi, guest_gsi;
     uint32_t device, intx, link;
-    struct list_head *digl_list, *tmp;
-    struct dev_intx_gsi_link *digl;
+    struct dev_intx_gsi_link *digl, *tmp;
     struct hvm_girq_dpci_mapping *girq;
+    struct pirq *pirq;
 
     machine_gsi = pt_irq_bind->machine_irq;
     device = pt_irq_bind->u.pci.device;
@@ -351,14 +329,14 @@
         }
     }
 
+    pirq = pirq_info(d, machine_gsi);
+    pirq_dpci = pirq_dpci(pirq);
+
     /* clear the mirq info */
-    if ( test_bit(machine_gsi, hvm_irq_dpci->mapping))
+    if ( pirq_dpci && (pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) )
     {
-        list_for_each_safe ( digl_list, tmp,
-                &hvm_irq_dpci->mirq[machine_gsi].digl_list )
+        list_for_each_entry_safe ( digl, tmp, &pirq_dpci->digl_list, list )
         {
-            digl = list_entry(digl_list,
-                    struct dev_intx_gsi_link, list);
             if ( digl->device == device &&
                  digl->intx   == intx &&
                  digl->link   == link &&
@@ -369,15 +347,15 @@
             }
         }
 
-        if ( list_empty(&hvm_irq_dpci->mirq[machine_gsi].digl_list) )
+        if ( list_empty(&pirq_dpci->digl_list) )
         {
-            pirq_guest_unbind(d, machine_gsi);
-            msixtbl_pt_unregister(d, machine_gsi);
-            if ( pt_irq_need_timer(hvm_irq_dpci->mirq[machine_gsi].flags) )
-                kill_timer(&hvm_irq_dpci->hvm_timer[machine_gsi]);
-            hvm_irq_dpci->mirq[machine_gsi].dom   = NULL;
-            hvm_irq_dpci->mirq[machine_gsi].flags = 0;
-            clear_bit(machine_gsi, hvm_irq_dpci->mapping);
+            pirq_guest_unbind(d, pirq);
+            msixtbl_pt_unregister(d, pirq);
+            if ( pt_irq_need_timer(pirq_dpci->flags) )
+                kill_timer(&pirq_dpci->timer);
+            pirq_dpci->dom   = NULL;
+            pirq_dpci->flags = 0;
+            pirq_cleanup_check(pirq, d);
         }
     }
     spin_unlock(&d->event_lock);
@@ -390,118 +368,154 @@
     return 0;
 }
 
-int hvm_do_IRQ_dpci(struct domain *d, unsigned int mirq)
+void pt_pirq_init(struct domain *d, struct hvm_pirq_dpci *dpci)
+{
+    INIT_LIST_HEAD(&dpci->digl_list);
+    dpci->gmsi.dest_vcpu_id = -1;
+}
+
+bool_t pt_pirq_cleanup_check(struct hvm_pirq_dpci *dpci)
+{
+    return !dpci->flags;
+}
+
+int pt_pirq_iterate(struct domain *d,
+                    int (*cb)(struct domain *,
+                              struct hvm_pirq_dpci *, void *),
+                    void *arg)
+{
+    int rc = 0;
+    unsigned int pirq = 0, n, i;
+    struct pirq *pirqs[8];
+
+    ASSERT(spin_is_locked(&d->event_lock));
+
+    do {
+        n = radix_tree_gang_lookup(&d->pirq_tree, (void **)pirqs, pirq,
+                                   ARRAY_SIZE(pirqs));
+        for ( i = 0; i < n; ++i )
+        {
+            struct hvm_pirq_dpci *pirq_dpci = pirq_dpci(pirqs[i]);
+
+            pirq = pirqs[i]->pirq;
+            if ( (pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) )
+                rc = cb(d, pirq_dpci, arg);
+        }
+    } while ( !rc && ++pirq < d->nr_pirqs && n == ARRAY_SIZE(pirqs) );
+
+    return rc;
+}
+
+int hvm_do_IRQ_dpci(struct domain *d, struct pirq *pirq)
 {
     struct hvm_irq_dpci *dpci = domain_get_irq_dpci(d);
+    struct hvm_pirq_dpci *pirq_dpci = pirq_dpci(pirq);
 
-    ASSERT(spin_is_locked(&irq_desc[domain_pirq_to_irq(d, mirq)].lock));
-    if ( !iommu_enabled || !dpci || !test_bit(mirq, dpci->mapping))
+    if ( !iommu_enabled || !dpci || !pirq_dpci ||
+         !(pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) )
         return 0;
 
-    set_bit(mirq, dpci->dirq_mask);
+    pirq_dpci->masked = 1;
     tasklet_schedule(&dpci->dirq_tasklet);
     return 1;
 }
 
 #ifdef SUPPORT_MSI_REMAPPING
 /* called with d->event_lock held */
-static void __msi_pirq_eoi(struct domain *d, int pirq)
+static void __msi_pirq_eoi(struct domain *d, struct hvm_pirq_dpci *pirq_dpci)
 {
-    struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci;
     irq_desc_t *desc;
 
-    if ( ( pirq >= 0 ) && ( pirq < d->nr_pirqs ) &&
-         test_bit(pirq, hvm_irq_dpci->mapping) &&
-         ( hvm_irq_dpci->mirq[pirq].flags & HVM_IRQ_DPCI_MACH_MSI) )
+    if ( (pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) &&
+         (pirq_dpci->flags & HVM_IRQ_DPCI_MACH_MSI) )
     {
+        struct pirq *pirq = dpci_pirq(pirq_dpci);
+
          BUG_ON(!local_irq_is_enabled());
-         desc = domain_spin_lock_irq_desc(d, pirq, NULL);
+         desc = pirq_spin_lock_irq_desc(d, pirq, NULL);
          if ( !desc )
             return;
 
          desc->status &= ~IRQ_INPROGRESS;
-         spin_unlock_irq(&desc->lock);
+         desc_guest_eoi(d, desc, pirq);
+    }
+}
 
-         pirq_guest_eoi(d, pirq);
+static int _hvm_dpci_msi_eoi(struct domain *d,
+                             struct hvm_pirq_dpci *pirq_dpci, void *arg)
+{
+    int vector = (long)arg;
+
+    if ( (pirq_dpci->flags & HVM_IRQ_DPCI_MACH_MSI) &&
+         (pirq_dpci->gmsi.gvec == vector) )
+    {
+        int dest = pirq_dpci->gmsi.gflags & VMSI_DEST_ID_MASK;
+        int dest_mode = !!(pirq_dpci->gmsi.gflags & VMSI_DM_MASK);
+
+        if ( vlapic_match_dest(vcpu_vlapic(current), NULL, 0, dest,
+                               dest_mode) )
+        {
+            __msi_pirq_eoi(d, pirq_dpci);
+            return 1;
+        }
     }
+
+    return 0;
 }
 
 void hvm_dpci_msi_eoi(struct domain *d, int vector)
 {
-    int pirq, dest, dest_mode;
-    struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci;
-
-    if ( !iommu_enabled || (hvm_irq_dpci == NULL) )
+    if ( !iommu_enabled || !d->arch.hvm_domain.irq.dpci )
        return;
 
     spin_lock(&d->event_lock);
-    for ( pirq = find_first_bit(hvm_irq_dpci->mapping, d->nr_pirqs);
-          pirq < d->nr_pirqs;
-          pirq = find_next_bit(hvm_irq_dpci->mapping, d->nr_pirqs, pirq + 1) )
-    {
-        if ( (!(hvm_irq_dpci->mirq[pirq].flags & HVM_IRQ_DPCI_MACH_MSI)) ||
-                (hvm_irq_dpci->mirq[pirq].gmsi.gvec != vector) )
-            continue;
-
-        dest = hvm_irq_dpci->mirq[pirq].gmsi.gflags & VMSI_DEST_ID_MASK;
-        dest_mode = !!(hvm_irq_dpci->mirq[pirq].gmsi.gflags & VMSI_DM_MASK);
-        if ( vlapic_match_dest(vcpu_vlapic(current), NULL, 0, dest, dest_mode) 
)
-            break;
-    }
-    if ( pirq < d->nr_pirqs )
-        __msi_pirq_eoi(d, pirq);
+    pt_pirq_iterate(d, _hvm_dpci_msi_eoi, (void *)(long)vector);
     spin_unlock(&d->event_lock);
 }
 
-static int hvm_pci_msi_assert(struct domain *d, int pirq)
+static int hvm_pci_msi_assert(struct domain *d,
+                              struct hvm_pirq_dpci *pirq_dpci)
 {
+    struct pirq *pirq = dpci_pirq(pirq_dpci);
+
     return (hvm_domain_use_pirq(d, pirq)
             ? send_guest_pirq(d, pirq)
-            : vmsi_deliver_pirq(d, pirq));
+            : vmsi_deliver_pirq(d, pirq_dpci));
 }
 #endif
 
-static void hvm_dirq_assist(unsigned long _d)
+static int _hvm_dirq_assist(struct domain *d, struct hvm_pirq_dpci *pirq_dpci,
+                            void *arg)
 {
-    unsigned int pirq;
     uint32_t device, intx;
-    struct domain *d = (struct domain *)_d;
-    struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci;
     struct dev_intx_gsi_link *digl;
 
-    ASSERT(hvm_irq_dpci);
-
-    for ( pirq = find_first_bit(hvm_irq_dpci->dirq_mask, d->nr_pirqs);
-          pirq < d->nr_pirqs;
-          pirq = find_next_bit(hvm_irq_dpci->dirq_mask, d->nr_pirqs, pirq + 1) 
)
+    if ( test_and_clear_bool(pirq_dpci->masked) )
     {
-        if ( !test_and_clear_bit(pirq, hvm_irq_dpci->dirq_mask) )
-            continue;
-
-        spin_lock(&d->event_lock);
 #ifdef SUPPORT_MSI_REMAPPING
-        if ( hvm_irq_dpci->mirq[pirq].flags & HVM_IRQ_DPCI_GUEST_MSI )
+        if ( pirq_dpci->flags & HVM_IRQ_DPCI_GUEST_MSI )
         {
-            hvm_pci_msi_assert(d, pirq);
-            spin_unlock(&d->event_lock);
-            continue;
+            hvm_pci_msi_assert(d, pirq_dpci);
+            return 0;
         }
 #endif
-        list_for_each_entry ( digl, &hvm_irq_dpci->mirq[pirq].digl_list, list )
+        list_for_each_entry ( digl, &pirq_dpci->digl_list, list )
         {
+            struct pirq *info = dpci_pirq(pirq_dpci);
+
             device = digl->device;
             intx = digl->intx;
-            if ( hvm_domain_use_pirq(d, pirq) )
-                send_guest_pirq(d, pirq);
+            if ( hvm_domain_use_pirq(d, info) )
+                send_guest_pirq(d, info);
             else
                 hvm_pci_intx_assert(d, device, intx);
-            hvm_irq_dpci->mirq[pirq].pending++;
+            pirq_dpci->pending++;
 
 #ifdef SUPPORT_MSI_REMAPPING
-            if ( hvm_irq_dpci->mirq[pirq].flags & HVM_IRQ_DPCI_TRANSLATE )
+            if ( pirq_dpci->flags & HVM_IRQ_DPCI_TRANSLATE )
             {
                 /* for translated MSI to INTx interrupt, eoi as early as 
possible */
-                __msi_pirq_eoi(d, pirq);
+                __msi_pirq_eoi(d, pirq_dpci);
             }
 #endif
         }
@@ -513,37 +527,50 @@
          * guest will never deal with the irq, then the physical interrupt line
          * will never be deasserted.
          */
-        if ( pt_irq_need_timer(hvm_irq_dpci->mirq[pirq].flags) )
-            set_timer(&hvm_irq_dpci->hvm_timer[pirq],
-                      NOW() + PT_IRQ_TIME_OUT);
-        spin_unlock(&d->event_lock);
+        if ( pt_irq_need_timer(pirq_dpci->flags) )
+            set_timer(&pirq_dpci->timer, NOW() + PT_IRQ_TIME_OUT);
     }
+
+    return 0;
+}
+
+static void hvm_dirq_assist(unsigned long _d)
+{
+    struct domain *d = (struct domain *)_d;
+
+    ASSERT(d->arch.hvm_domain.irq.dpci);
+
+    spin_lock(&d->event_lock);
+    pt_pirq_iterate(d, _hvm_dirq_assist, NULL);
+    spin_unlock(&d->event_lock);
 }
 
 static void __hvm_dpci_eoi(struct domain *d,
-                           struct hvm_irq_dpci *hvm_irq_dpci,
                            struct hvm_girq_dpci_mapping *girq,
                            union vioapic_redir_entry *ent)
 {
-    uint32_t device, intx, machine_gsi;
+    uint32_t device, intx;
+    struct pirq *pirq;
+    struct hvm_pirq_dpci *pirq_dpci;
 
     device = girq->device;
     intx = girq->intx;
     hvm_pci_intx_deassert(d, device, intx);
 
-    machine_gsi = girq->machine_gsi;
+    pirq = pirq_info(d, girq->machine_gsi);
+    pirq_dpci = pirq_dpci(pirq);
 
     /*
      * No need to get vector lock for timer
      * since interrupt is still not EOIed
      */
-    if ( --hvm_irq_dpci->mirq[machine_gsi].pending ||
+    if ( --pirq_dpci->pending ||
          ( ent && ent->fields.mask ) ||
-         ! pt_irq_need_timer(hvm_irq_dpci->mirq[machine_gsi].flags) )
+         ! pt_irq_need_timer(pirq_dpci->flags) )
         return;
 
-    stop_timer(&hvm_irq_dpci->hvm_timer[machine_gsi]);
-    pirq_guest_eoi(d, machine_gsi);
+    stop_timer(&pirq_dpci->timer);
+    pirq_guest_eoi(d, pirq);
 }
 
 void hvm_dpci_eoi(struct domain *d, unsigned int guest_gsi,
@@ -568,7 +595,7 @@
         goto unlock;
 
     list_for_each_entry ( girq, &hvm_irq_dpci->girq[guest_gsi], list )
-        __hvm_dpci_eoi(d, hvm_irq_dpci, girq, ent);
+        __hvm_dpci_eoi(d, girq, ent);
 
 unlock:
     spin_unlock(&d->event_lock);
diff -r 3dcb553f3ba9 -r b24018319772 xen/drivers/passthrough/iommu.c
--- a/xen/drivers/passthrough/iommu.c   Fri Jun 10 10:47:29 2011 +0200
+++ b/xen/drivers/passthrough/iommu.c   Thu Jun 23 18:34:55 2011 +0100
@@ -48,7 +48,7 @@
 bool_t __read_mostly iommu_qinval = 1;
 bool_t __read_mostly iommu_intremap = 1;
 bool_t __read_mostly iommu_hap_pt_share;
-bool_t __read_mostly amd_iommu_debug;
+bool_t __read_mostly iommu_debug;
 bool_t __read_mostly amd_iommu_perdev_intremap;
 
 static void __init parse_iommu_param(char *s)
@@ -74,8 +74,8 @@
             iommu_qinval = 0;
         else if ( !strcmp(s, "no-intremap") )
             iommu_intremap = 0;
-        else if ( !strcmp(s, "amd-iommu-debug") )
-            amd_iommu_debug = 1;
+        else if ( !strcmp(s, "debug") )
+            iommu_debug = 1;
         else if ( !strcmp(s, "amd-iommu-perdev-intremap") )
             amd_iommu_perdev_intremap = 1;
         else if ( !strcmp(s, "dom0-passthrough") )
diff -r 3dcb553f3ba9 -r b24018319772 xen/drivers/passthrough/pci.c
--- a/xen/drivers/passthrough/pci.c     Fri Jun 10 10:47:29 2011 +0200
+++ b/xen/drivers/passthrough/pci.c     Thu Jun 23 18:34:55 2011 +0100
@@ -21,6 +21,7 @@
 #include <xen/list.h>
 #include <xen/prefetch.h>
 #include <xen/iommu.h>
+#include <xen/irq.h>
 #include <asm/hvm/iommu.h>
 #include <asm/hvm/irq.h>
 #include <xen/delay.h>
@@ -242,12 +243,28 @@
     return ret;
 }
 
+static int pci_clean_dpci_irq(struct domain *d,
+                              struct hvm_pirq_dpci *pirq_dpci, void *arg)
+{
+    struct dev_intx_gsi_link *digl, *tmp;
+
+    pirq_guest_unbind(d, dpci_pirq(pirq_dpci));
+
+    if ( pt_irq_need_timer(pirq_dpci->flags) )
+        kill_timer(&pirq_dpci->timer);
+
+    list_for_each_entry_safe ( digl, tmp, &pirq_dpci->digl_list, list )
+    {
+        list_del(&digl->list);
+        xfree(digl);
+    }
+
+    return 0;
+}
+
 static void pci_clean_dpci_irqs(struct domain *d)
 {
     struct hvm_irq_dpci *hvm_irq_dpci = NULL;
-    uint32_t i;
-    struct list_head *digl_list, *tmp;
-    struct dev_intx_gsi_link *digl;
 
     if ( !iommu_enabled )
         return;
@@ -261,24 +278,7 @@
     {
         tasklet_kill(&hvm_irq_dpci->dirq_tasklet);
 
-        for ( i = find_first_bit(hvm_irq_dpci->mapping, d->nr_pirqs);
-              i < d->nr_pirqs;
-              i = find_next_bit(hvm_irq_dpci->mapping, d->nr_pirqs, i + 1) )
-        {
-            pirq_guest_unbind(d, i);
-
-            if ( pt_irq_need_timer(hvm_irq_dpci->mirq[i].flags) )
-                kill_timer(&hvm_irq_dpci->hvm_timer[i]);
-
-            list_for_each_safe ( digl_list, tmp,
-                                 &hvm_irq_dpci->mirq[i].digl_list )
-            {
-                digl = list_entry(digl_list,
-                                  struct dev_intx_gsi_link, list);
-                list_del(&digl->list);
-                xfree(digl);
-            }
-        }
+        pt_pirq_iterate(d, pci_clean_dpci_irq, NULL);
 
         d->arch.hvm_domain.irq.dpci = NULL;
         free_hvm_irq_dpci(hvm_irq_dpci);
diff -r 3dcb553f3ba9 -r b24018319772 xen/drivers/passthrough/vtd/iommu.c
--- a/xen/drivers/passthrough/vtd/iommu.c       Fri Jun 10 10:47:29 2011 +0200
+++ b/xen/drivers/passthrough/vtd/iommu.c       Thu Jun 23 18:34:55 2011 +0100
@@ -818,7 +818,7 @@
 
     if ( fault_type == DMA_REMAP )
     {
-        dprintk(XENLOG_WARNING VTDPREFIX,
+        INTEL_IOMMU_DEBUG(
                 "DMAR:[%s] Request device [%02x:%02x.%d] "
                 "fault addr %"PRIx64", iommu reg = %p\n"
                 "DMAR:[fault reason %02xh] %s\n",
@@ -827,12 +827,13 @@
                 PCI_FUNC(source_id & 0xFF), addr, iommu->reg,
                 fault_reason, reason);
 #ifndef __i386__ /* map_domain_page() cannot be used in this context */
-        print_vtd_entries(iommu, (source_id >> 8),
+       if (iommu_debug)
+            print_vtd_entries(iommu, (source_id >> 8),
                           (source_id & 0xff), (addr >> PAGE_SHIFT));
 #endif
     }
     else
-        dprintk(XENLOG_WARNING VTDPREFIX,
+        INTEL_IOMMU_DEBUG(
                 "INTR-REMAP: Request device [%02x:%02x.%d] "
                 "fault index %"PRIx64", iommu reg = %p\n"
                 "INTR-REMAP:[fault reason %02xh] %s\n",
@@ -846,26 +847,19 @@
 static void iommu_fault_status(u32 fault_status)
 {
     if ( fault_status & DMA_FSTS_PFO )
-        dprintk(XENLOG_ERR VTDPREFIX,
-            "iommu_fault_status: Fault Overflow\n");
+        INTEL_IOMMU_DEBUG("iommu_fault_status: Fault Overflow\n");
     if ( fault_status & DMA_FSTS_PPF )
-        dprintk(XENLOG_ERR VTDPREFIX,
-            "iommu_fault_status: Primary Pending Fault\n");
+        INTEL_IOMMU_DEBUG("iommu_fault_status: Primary Pending Fault\n");
     if ( fault_status & DMA_FSTS_AFO )
-        dprintk(XENLOG_ERR VTDPREFIX,
-            "iommu_fault_status: Advanced Fault Overflow\n");
+        INTEL_IOMMU_DEBUG("iommu_fault_status: Advanced Fault Overflow\n");
     if ( fault_status & DMA_FSTS_APF )
-        dprintk(XENLOG_ERR VTDPREFIX,
-            "iommu_fault_status: Advanced Pending Fault\n");
+        INTEL_IOMMU_DEBUG("iommu_fault_status: Advanced Pending Fault\n");
     if ( fault_status & DMA_FSTS_IQE )
-        dprintk(XENLOG_ERR VTDPREFIX,
-            "iommu_fault_status: Invalidation Queue Error\n");
+        INTEL_IOMMU_DEBUG("iommu_fault_status: Invalidation Queue Error\n");
     if ( fault_status & DMA_FSTS_ICE )
-        dprintk(XENLOG_ERR VTDPREFIX,
-            "iommu_fault_status: Invalidation Completion Error\n");
+        INTEL_IOMMU_DEBUG("iommu_fault_status: Invalidation Completion 
Error\n");
     if ( fault_status & DMA_FSTS_ITE )
-        dprintk(XENLOG_ERR VTDPREFIX,
-            "iommu_fault_status: Invalidation Time-out Error\n");
+        INTEL_IOMMU_DEBUG("iommu_fault_status: Invalidation Time-out Error\n");
 }
 
 #define PRIMARY_FAULT_REG_LEN (16)
diff -r 3dcb553f3ba9 -r b24018319772 xen/drivers/passthrough/vtd/iommu.h
--- a/xen/drivers/passthrough/vtd/iommu.h       Fri Jun 10 10:47:29 2011 +0200
+++ b/xen/drivers/passthrough/vtd/iommu.h       Thu Jun 23 18:34:55 2011 +0100
@@ -512,4 +512,11 @@
     struct acpi_drhd_unit *drhd;
 };
 
+#define INTEL_IOMMU_DEBUG(fmt, args...) \
+    do  \
+    {   \
+        if ( iommu_debug )  \
+            dprintk(XENLOG_WARNING VTDPREFIX, fmt, ## args);    \
+    } while(0)
+
 #endif
diff -r 3dcb553f3ba9 -r b24018319772 xen/drivers/passthrough/vtd/x86/vtd.c
--- a/xen/drivers/passthrough/vtd/x86/vtd.c     Fri Jun 10 10:47:29 2011 +0200
+++ b/xen/drivers/passthrough/vtd/x86/vtd.c     Thu Jun 23 18:34:55 2011 +0100
@@ -23,6 +23,7 @@
 #include <xen/domain_page.h>
 #include <asm/paging.h>
 #include <xen/iommu.h>
+#include <xen/irq.h>
 #include <xen/numa.h>
 #include <asm/fixmap.h>
 #include <asm/setup.h>
@@ -69,12 +70,32 @@
     return (void *)fix_to_virt(FIX_IOMMU_REGS_BASE_0 + nr_iommus);
 }
 
+static int _hvm_dpci_isairq_eoi(struct domain *d,
+                                struct hvm_pirq_dpci *pirq_dpci, void *arg)
+{
+    struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq;
+    unsigned int isairq = (long)arg;
+    struct dev_intx_gsi_link *digl, *tmp;
+
+    list_for_each_entry_safe ( digl, tmp, &pirq_dpci->digl_list, list )
+    {
+        if ( hvm_irq->pci_link.route[digl->link] == isairq )
+        {
+            hvm_pci_intx_deassert(d, digl->device, digl->intx);
+            if ( --pirq_dpci->pending == 0 )
+            {
+                stop_timer(&pirq_dpci->timer);
+                pirq_guest_eoi(d, dpci_pirq(pirq_dpci));
+            }
+        }
+    }
+
+    return 0;
+}
+
 void hvm_dpci_isairq_eoi(struct domain *d, unsigned int isairq)
 {
-    struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq;
     struct hvm_irq_dpci *dpci = NULL;
-    struct dev_intx_gsi_link *digl, *tmp;
-    int i;
 
     ASSERT(isairq < NR_ISAIRQS);
     if ( !iommu_enabled)
@@ -84,29 +105,10 @@
 
     dpci = domain_get_irq_dpci(d);
 
-    if ( !dpci || !test_bit(isairq, dpci->isairq_map) )
+    if ( dpci && test_bit(isairq, dpci->isairq_map) )
     {
-        spin_unlock(&d->event_lock);
-        return;
-    }
-    /* Multiple mirq may be mapped to one isa irq */
-    for ( i = find_first_bit(dpci->mapping, d->nr_pirqs);
-          i < d->nr_pirqs;
-          i = find_next_bit(dpci->mapping, d->nr_pirqs, i + 1) )
-    {
-        list_for_each_entry_safe ( digl, tmp,
-            &dpci->mirq[i].digl_list, list )
-        {
-            if ( hvm_irq->pci_link.route[digl->link] == isairq )
-            {
-                hvm_pci_intx_deassert(d, digl->device, digl->intx);
-                if ( --dpci->mirq[i].pending == 0 )
-                {
-                    stop_timer(&dpci->hvm_timer[i]);
-                    pirq_guest_eoi(d, i);
-                }
-            }
-        }
+        /* Multiple mirq may be mapped to one isa irq */
+        pt_pirq_iterate(d, _hvm_dpci_isairq_eoi, (void *)(long)isairq);
     }
     spin_unlock(&d->event_lock);
 }
diff -r 3dcb553f3ba9 -r b24018319772 xen/include/asm-ia64/domain.h
--- a/xen/include/asm-ia64/domain.h     Fri Jun 10 10:47:29 2011 +0200
+++ b/xen/include/asm-ia64/domain.h     Thu Jun 23 18:34:55 2011 +0100
@@ -11,6 +11,7 @@
 #include <xen/list.h>
 #include <xen/cpumask.h>
 #include <xen/mm.h>
+#include <xen/hvm/irq.h>
 #include <asm/fpswa.h>
 #include <xen/rangeset.h>
 
@@ -316,6 +317,23 @@
     cpumask_t cache_coherent_map;
 };
 
+struct arch_pirq {
+    struct hvm_pirq_dpci dpci;
+};
+
+#define pirq_dpci(pirq) ((pirq) ? &(pirq)->arch.dpci : NULL)
+#define dpci_pirq(dpci) container_of(dpci, struct pirq, arch.dpci)
+
+#define alloc_pirq_struct(d) ({ \
+    struct pirq *pirq = xmalloc(struct pirq); \
+    if ( pirq ) \
+    { \
+        memset(pirq, 0, sizeof(*pirq)); \
+        pt_pirq_init(d, &pirq->arch.dpci); \
+    } \
+    pirq; \
+})
+
 #include <asm/uaccess.h> /* for KERNEL_DS */
 #include <asm/pgtable.h>
 
diff -r 3dcb553f3ba9 -r b24018319772 xen/include/asm-x86/bzimage.h
--- a/xen/include/asm-x86/bzimage.h     Fri Jun 10 10:47:29 2011 +0200
+++ b/xen/include/asm-x86/bzimage.h     Thu Jun 23 18:34:55 2011 +0100
@@ -4,7 +4,7 @@
 #include <xen/config.h>
 #include <xen/init.h>
 
-int bzimage_headroom(char *image_start, unsigned long image_length);
+unsigned long bzimage_headroom(char *image_start, unsigned long image_length);
 
 int bzimage_parse(char *image_base, char **image_start,
                   unsigned long *image_len);
diff -r 3dcb553f3ba9 -r b24018319772 xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h      Fri Jun 10 10:47:29 2011 +0200
+++ b/xen/include/asm-x86/domain.h      Thu Jun 23 18:34:55 2011 +0100
@@ -275,9 +275,6 @@
 
     /* NB. protected by d->event_lock and by irq_desc[irq].lock */
     struct radix_tree_root irq_pirq;
-    int *pirq_irq;
-    /* pirq to emulated irq */
-    int *pirq_emuirq;
 
     /* Maximum physical-address bitwidth supported by this guest. */
     unsigned int physaddr_bitsize;
diff -r 3dcb553f3ba9 -r b24018319772 xen/include/asm-x86/hvm/hvm.h
--- a/xen/include/asm-x86/hvm/hvm.h     Fri Jun 10 10:47:29 2011 +0200
+++ b/xen/include/asm-x86/hvm/hvm.h     Thu Jun 23 18:34:55 2011 +0100
@@ -204,7 +204,8 @@
     struct domain *d, int vector,
     uint8_t dest, uint8_t dest_mode,
     uint8_t delivery_mode, uint8_t trig_mode);
-int vmsi_deliver_pirq(struct domain *d, int pirq);
+struct hvm_pirq_dpci;
+int vmsi_deliver_pirq(struct domain *d, const struct hvm_pirq_dpci *);
 int hvm_girq_dest_2_vcpu_id(struct domain *d, uint8_t dest, uint8_t dest_mode);
 
 #define hvm_paging_enabled(v) \
diff -r 3dcb553f3ba9 -r b24018319772 
xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h     Fri Jun 10 10:47:29 
2011 +0200
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h     Thu Jun 23 18:34:55 
2011 +0100
@@ -35,7 +35,7 @@
 #define AMD_IOMMU_DEBUG(fmt, args...) \
     do  \
     {   \
-        if ( amd_iommu_debug )  \
+        if ( iommu_debug )  \
             printk(XENLOG_INFO "AMD-Vi: " fmt, ## args);    \
     } while(0)
 
diff -r 3dcb553f3ba9 -r b24018319772 xen/include/asm-x86/irq.h
--- a/xen/include/asm-x86/irq.h Fri Jun 10 10:47:29 2011 +0200
+++ b/xen/include/asm-x86/irq.h Thu Jun 23 18:34:55 2011 +0100
@@ -7,6 +7,7 @@
 #include <asm/atomic.h>
 #include <xen/cpumask.h>
 #include <xen/smp.h>
+#include <xen/hvm/irq.h>
 #include <irq_vectors.h>
 #include <asm/percpu.h>
 
@@ -105,6 +106,20 @@
 
 DECLARE_PER_CPU(unsigned int, irq_count);
 
+struct pirq;
+struct arch_pirq {
+    int irq;
+    union {
+        struct hvm_pirq {
+            int emuirq;
+            struct hvm_pirq_dpci dpci;
+        } hvm;
+    };
+};
+
+#define pirq_dpci(pirq) ((pirq) ? &(pirq)->arch.hvm.dpci : NULL)
+#define dpci_pirq(pd) container_of(pd, struct pirq, arch.hvm.dpci)
+
 int pirq_shared(struct domain *d , int irq);
 
 int map_domain_pirq(struct domain *d, int pirq, int irq, int type,
@@ -114,7 +129,7 @@
 void free_domain_pirqs(struct domain *d);
 int map_domain_emuirq_pirq(struct domain *d, int pirq, int irq);
 int unmap_domain_pirq_emuirq(struct domain *d, int pirq);
-int hvm_domain_use_pirq(struct domain *d, int irq);
+bool_t hvm_domain_use_pirq(const struct domain *, const struct pirq *);
 
 /* A cpu has been removed from cpu_online_mask.  Re-set irq affinities. */
 void fixup_irqs(void);
@@ -149,13 +164,13 @@
 int init_domain_irq_mapping(struct domain *);
 void cleanup_domain_irq_mapping(struct domain *);
 
-#define domain_pirq_to_irq(d, pirq) ((d)->arch.pirq_irq[pirq])
+#define domain_pirq_to_irq(d, pirq) pirq_field(d, pirq, arch.irq)
 #define domain_irq_to_pirq(d, irq) ({                           \
     void *__ret = radix_tree_lookup(&(d)->arch.irq_pirq, irq);  \
     __ret ? radix_tree_ptr_to_int(__ret) : 0;                   \
 })
 #define PIRQ_ALLOCATED -1
-#define domain_pirq_to_emuirq(d, pirq) ((d)->arch.pirq_emuirq[pirq])
+#define domain_pirq_to_emuirq(d, pirq) pirq_field(d, pirq, arch.hvm.emuirq)
 #define domain_emuirq_to_pirq(d, emuirq) ({                             \
     void *__ret = radix_tree_lookup(&(d)->arch.hvm_domain.emuirq_pirq,  \
                                     emuirq);                            \
diff -r 3dcb553f3ba9 -r b24018319772 xen/include/xen/domain.h
--- a/xen/include/xen/domain.h  Fri Jun 10 10:47:29 2011 +0200
+++ b/xen/include/xen/domain.h  Thu Jun 23 18:34:55 2011 +0100
@@ -38,6 +38,12 @@
 void free_vcpu_guest_context(struct vcpu_guest_context *);
 #endif
 
+/* Allocate/free a PIRQ structure. */
+#ifndef alloc_pirq_struct
+struct pirq *alloc_pirq_struct(struct domain *);
+#endif
+void free_pirq_struct(void *);
+
 /*
  * Initialise/destroy arch-specific details of a VCPU.
  *  - vcpu_initialise() is called after the basic generic fields of the
diff -r 3dcb553f3ba9 -r b24018319772 xen/include/xen/event.h
--- a/xen/include/xen/event.h   Fri Jun 10 10:47:29 2011 +0200
+++ b/xen/include/xen/event.h   Thu Jun 23 18:34:55 2011 +0100
@@ -36,7 +36,7 @@
  *  @pirq:     Physical IRQ number
  * Returns TRUE if the delivery port was already pending.
  */
-int send_guest_pirq(struct domain *d, int pirq);
+int send_guest_pirq(struct domain *, const struct pirq *);
 
 /* Send a notification from a given domain's event-channel port. */
 int evtchn_send(struct domain *d, unsigned int lport);
diff -r 3dcb553f3ba9 -r b24018319772 xen/include/xen/hvm/irq.h
--- a/xen/include/xen/hvm/irq.h Fri Jun 10 10:47:29 2011 +0200
+++ b/xen/include/xen/hvm/irq.h Thu Jun 23 18:34:55 2011 +0100
@@ -25,7 +25,7 @@
 #include <xen/types.h>
 #include <xen/spinlock.h>
 #include <xen/tasklet.h>
-#include <asm/irq.h>
+#include <xen/timer.h>
 #include <public/hvm/save.h>
 
 struct dev_intx_gsi_link {
@@ -38,11 +38,15 @@
 
 #define _HVM_IRQ_DPCI_MACH_PCI_SHIFT            0
 #define _HVM_IRQ_DPCI_MACH_MSI_SHIFT            1
+#define _HVM_IRQ_DPCI_MAPPED_SHIFT              2
+#define _HVM_IRQ_DPCI_EOI_LATCH_SHIFT           3
 #define _HVM_IRQ_DPCI_GUEST_PCI_SHIFT           4
 #define _HVM_IRQ_DPCI_GUEST_MSI_SHIFT           5
 #define _HVM_IRQ_DPCI_TRANSLATE_SHIFT          15
 #define HVM_IRQ_DPCI_MACH_PCI        (1 << _HVM_IRQ_DPCI_MACH_PCI_SHIFT)
 #define HVM_IRQ_DPCI_MACH_MSI        (1 << _HVM_IRQ_DPCI_MACH_MSI_SHIFT)
+#define HVM_IRQ_DPCI_MAPPED          (1 << _HVM_IRQ_DPCI_MAPPED_SHIFT)
+#define HVM_IRQ_DPCI_EOI_LATCH       (1 << _HVM_IRQ_DPCI_EOI_LATCH_SHIFT)
 #define HVM_IRQ_DPCI_GUEST_PCI       (1 << _HVM_IRQ_DPCI_GUEST_PCI_SHIFT)
 #define HVM_IRQ_DPCI_GUEST_MSI       (1 << _HVM_IRQ_DPCI_GUEST_MSI_SHIFT)
 #define HVM_IRQ_DPCI_TRANSLATE       (1 << _HVM_IRQ_DPCI_TRANSLATE_SHIFT)
@@ -63,14 +67,6 @@
     int dest_vcpu_id; /* -1 :multi-dest, non-negative: dest_vcpu_id */
 };
 
-struct hvm_mirq_dpci_mapping {
-    uint32_t flags;
-    int pending;
-    struct list_head digl_list;
-    struct domain *dom;
-    struct hvm_gmsi_info gmsi;
-};
-
 struct hvm_girq_dpci_mapping {
     struct list_head list;
     uint8_t device;
@@ -88,20 +84,33 @@
 
 /* Protected by domain's event_lock */
 struct hvm_irq_dpci {
-    /* Machine IRQ to guest device/intx mapping. */
-    unsigned long *mapping;
-    struct hvm_mirq_dpci_mapping *mirq;
-    unsigned long *dirq_mask;
     /* Guest IRQ to guest device/intx mapping. */
     struct list_head girq[NR_HVM_IRQS];
     /* Record of mapped ISA IRQs */
     DECLARE_BITMAP(isairq_map, NR_ISAIRQS);
     /* Record of mapped Links */
     uint8_t link_cnt[NR_LINK];
-    struct timer *hvm_timer;
     struct tasklet dirq_tasklet;
 };
 
+/* Machine IRQ to guest device/intx mapping. */
+struct hvm_pirq_dpci {
+    uint32_t flags;
+    bool_t masked;
+    uint16_t pending;
+    struct list_head digl_list;
+    struct domain *dom;
+    struct hvm_gmsi_info gmsi;
+    struct timer timer;
+};
+
+void pt_pirq_init(struct domain *, struct hvm_pirq_dpci *);
+bool_t pt_pirq_cleanup_check(struct hvm_pirq_dpci *);
+int pt_pirq_iterate(struct domain *d,
+                    int (*cb)(struct domain *,
+                              struct hvm_pirq_dpci *, void *arg),
+                    void *arg);
+
 /* Modify state of a PCI INTx wire. */
 void hvm_pci_intx_assert(
     struct domain *d, unsigned int device, unsigned int intx);
diff -r 3dcb553f3ba9 -r b24018319772 xen/include/xen/iommu.h
--- a/xen/include/xen/iommu.h   Fri Jun 10 10:47:29 2011 +0200
+++ b/xen/include/xen/iommu.h   Thu Jun 23 18:34:55 2011 +0100
@@ -31,7 +31,7 @@
 extern bool_t iommu_workaround_bios_bug, iommu_passthrough;
 extern bool_t iommu_snoop, iommu_qinval, iommu_intremap;
 extern bool_t iommu_hap_pt_share;
-extern bool_t amd_iommu_debug;
+extern bool_t iommu_debug;
 extern bool_t amd_iommu_perdev_intremap;
 
 extern struct rangeset *mmio_ro_ranges;
@@ -88,7 +88,9 @@
 void iommu_pte_flush(struct domain *d, u64 gfn, u64 *pte, int order, int 
present);
 void iommu_set_pgd(struct domain *d);
 void iommu_domain_teardown(struct domain *d);
-int hvm_do_IRQ_dpci(struct domain *d, unsigned int irq);
+
+struct pirq;
+int hvm_do_IRQ_dpci(struct domain *, struct pirq *);
 int dpci_ioport_intercept(ioreq_t *p);
 int pt_irq_create_bind_vtd(struct domain *d,
                            xen_domctl_bind_pt_irq_t *pt_irq_bind);
diff -r 3dcb553f3ba9 -r b24018319772 xen/include/xen/irq.h
--- a/xen/include/xen/irq.h     Fri Jun 10 10:47:29 2011 +0200
+++ b/xen/include/xen/irq.h     Thu Jun 23 18:34:55 2011 +0100
@@ -3,6 +3,7 @@
 
 #include <xen/config.h>
 #include <xen/cpumask.h>
+#include <xen/rcupdate.h>
 #include <xen/spinlock.h>
 #include <xen/time.h>
 #include <xen/list.h>
@@ -135,13 +136,42 @@
 
 struct domain;
 struct vcpu;
-extern int pirq_guest_eoi(struct domain *d, int irq);
+
+struct pirq {
+    int pirq;
+    u16 evtchn;
+    bool_t masked;
+    struct rcu_head rcu_head;
+    struct arch_pirq arch;
+};
+
+#define pirq_info(d, p) ((struct pirq *)radix_tree_lookup(&(d)->pirq_tree, p))
+
+/* Use this instead of pirq_info() if the structure may need allocating. */
+extern struct pirq *pirq_get_info(struct domain *, int pirq);
+
+#define pirq_field(d, p, f) ({ \
+    const struct pirq *__pi = pirq_info(d, p); \
+    __pi ? __pi->f : 0; \
+})
+#define pirq_to_evtchn(d, pirq) pirq_field(d, pirq, evtchn)
+#define pirq_masked(d, pirq) pirq_field(d, pirq, masked)
+
+void pirq_cleanup_check(struct pirq *, struct domain *);
+
+#define pirq_cleanup_check(pirq, d) \
+    ((pirq)->evtchn ? pirq_cleanup_check(pirq, d) : (void)0)
+
+extern void pirq_guest_eoi(struct domain *, struct pirq *);
+extern void desc_guest_eoi(struct domain *, struct irq_desc *, struct pirq *);
 extern int pirq_guest_unmask(struct domain *d);
-extern int pirq_guest_bind(struct vcpu *v, int irq, int will_share);
-extern void pirq_guest_unbind(struct domain *d, int irq);
+extern int pirq_guest_bind(struct vcpu *, struct pirq *, int will_share);
+extern void pirq_guest_unbind(struct domain *d, struct pirq *);
 extern void pirq_set_affinity(struct domain *d, int irq, const cpumask_t *);
 extern irq_desc_t *domain_spin_lock_irq_desc(
     struct domain *d, int irq, unsigned long *pflags);
+extern irq_desc_t *pirq_spin_lock_irq_desc(
+    struct domain *, const struct pirq *, unsigned long *pflags);
 
 static inline void set_native_irq_info(unsigned int irq, const cpumask_t *mask)
 {
diff -r 3dcb553f3ba9 -r b24018319772 xen/include/xen/pci.h
--- a/xen/include/xen/pci.h     Fri Jun 10 10:47:29 2011 +0200
+++ b/xen/include/xen/pci.h     Thu Jun 23 18:34:55 2011 +0100
@@ -117,8 +117,9 @@
 int pci_find_next_cap(u8 bus, unsigned int devfn, u8 pos, int cap);
 int pci_find_ext_capability(int seg, int bus, int devfn, int cap);
 
-int msixtbl_pt_register(struct domain *d, int pirq, uint64_t gtable);
-void msixtbl_pt_unregister(struct domain *d, int pirq);
+struct pirq;
+int msixtbl_pt_register(struct domain *, struct pirq *, uint64_t gtable);
+void msixtbl_pt_unregister(struct domain *, struct pirq *);
 void msixtbl_pt_cleanup(struct domain *d);
 void pci_enable_acs(struct pci_dev *pdev);
 
diff -r 3dcb553f3ba9 -r b24018319772 xen/include/xen/preempt.h
--- a/xen/include/xen/preempt.h Fri Jun 10 10:47:29 2011 +0200
+++ b/xen/include/xen/preempt.h Thu Jun 23 18:34:55 2011 +0100
@@ -10,9 +10,8 @@
 #define __XEN_PREEMPT_H__
 
 #include <xen/config.h>
+#include <xen/types.h>
 #include <xen/percpu.h>
-#include <xen/irq.h>    /* in_irq() */
-#include <asm/system.h> /* local_irq_is_enabled() */
 
 DECLARE_PER_CPU(unsigned int, __preempt_count);
 
@@ -28,6 +27,6 @@
     preempt_count()--;                          \
 } while (0)
 
-#define in_atomic() (preempt_count() || in_irq() || !local_irq_is_enabled())
+bool_t in_atomic(void);
 
 #endif /* __XEN_PREEMPT_H__ */
diff -r 3dcb553f3ba9 -r b24018319772 xen/include/xen/sched.h
--- a/xen/include/xen/sched.h   Fri Jun 10 10:47:29 2011 +0200
+++ b/xen/include/xen/sched.h   Thu Jun 23 18:34:55 2011 +0100
@@ -12,6 +12,7 @@
 #include <xen/rcupdate.h>
 #include <xen/cpumask.h>
 #include <xen/nodemask.h>
+#include <xen/radix-tree.h>
 #include <xen/multicall.h>
 #include <public/xen.h>
 #include <public/domctl.h>
@@ -227,13 +228,11 @@
     struct grant_table *grant_table;
 
     /*
-     * Interrupt to event-channel mappings. Updates should be protected by the 
-     * domain's event-channel spinlock. Read accesses can also synchronise on 
-     * the lock, but races don't usually matter.
+     * Interrupt to event-channel mappings and other per-guest-pirq data.
+     * Protected by the domain's event-channel spinlock.
      */
     unsigned int     nr_pirqs;
-    u16             *pirq_to_evtchn;
-    unsigned long   *pirq_mask;
+    struct radix_tree_root pirq_tree;
 
     /* I/O capabilities (access to IRQs and memory-mapped I/O). */
     struct rangeset *iomem_caps;

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.