[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] MSI 3/6: add msi support to xen



# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1209634289 -3600
# Node ID 86c0353f19d03ec79d352074a54d94863d2dac4c
# Parent  6ecbb00e58cd891fb3c26455bb096ed5fec0b0aa
MSI 3/6: add msi support to xen

Signed-off-by: Jiang Yunhong <yunhong.jiang@xxxxxxxxx>
Signed-off-by: Shan Haitao <haitao.shan@xxxxxxxxx>
---
 xen/drivers/passthrough/vtd/msi.h            |  127 ----
 xen/arch/x86/Makefile                        |    1 
 xen/arch/x86/i8259.c                         |    7 
 xen/arch/x86/io_apic.c                       |   58 +
 xen/arch/x86/irq.c                           |    4 
 xen/arch/x86/msi.c                           |  787 +++++++++++++++++++++++++++
 xen/arch/x86/physdev.c                       |   60 +-
 xen/drivers/passthrough/amd/iommu_init.c     |    3 
 xen/drivers/passthrough/vtd/intremap.c       |    2 
 xen/drivers/passthrough/vtd/iommu.c          |    2 
 xen/drivers/passthrough/vtd/qinval.c         |    2 
 xen/drivers/passthrough/vtd/utils.c          |    2 
 xen/include/asm-x86/fixmap.h                 |    3 
 xen/include/asm-x86/hvm/svm/amd-iommu-defs.h |   29 
 xen/include/asm-x86/msi.h                    |  210 +++++++
 xen/include/public/physdev.h                 |    5 
 xen/include/xen/iommu.h                      |    1 
 xen/include/xen/irq.h                        |    2 
 xen/include/xen/pci.h                        |    2 
 19 files changed, 1138 insertions(+), 169 deletions(-)

diff -r 6ecbb00e58cd -r 86c0353f19d0 xen/arch/x86/Makefile
--- a/xen/arch/x86/Makefile     Thu May 01 10:30:22 2008 +0100
+++ b/xen/arch/x86/Makefile     Thu May 01 10:31:29 2008 +0100
@@ -24,6 +24,7 @@ obj-y += i387.o
 obj-y += i387.o
 obj-y += i8259.o
 obj-y += io_apic.o
+obj-y += msi.o
 obj-y += ioport_emulate.o
 obj-y += irq.o
 obj-y += microcode.o
diff -r 6ecbb00e58cd -r 86c0353f19d0 xen/arch/x86/i8259.c
--- a/xen/arch/x86/i8259.c      Thu May 01 10:30:22 2008 +0100
+++ b/xen/arch/x86/i8259.c      Thu May 01 10:31:29 2008 +0100
@@ -382,6 +382,7 @@ void __devinit init_8259A(int auto_eoi)
 
 static struct irqaction cascade = { no_action, "cascade", NULL};
 
+extern struct list_head msi_pdev_list;
 void __init init_IRQ(void)
 {
     int i;
@@ -418,5 +419,7 @@ void __init init_IRQ(void)
     outb(LATCH >> 8, PIT_CH0);     /* MSB */
 
     setup_irq(2, &cascade);
-}
-
+
+    INIT_LIST_HEAD(&msi_pdev_list);
+}
+
diff -r 6ecbb00e58cd -r 86c0353f19d0 xen/arch/x86/io_apic.c
--- a/xen/arch/x86/io_apic.c    Thu May 01 10:30:22 2008 +0100
+++ b/xen/arch/x86/io_apic.c    Thu May 01 10:31:29 2008 +0100
@@ -27,13 +27,17 @@
 #include <xen/delay.h>
 #include <xen/sched.h>
 #include <xen/acpi.h>
+#include <xen/pci.h>
+#include <xen/pci_regs.h>
 #include <xen/keyhandler.h>
 #include <asm/io.h>
 #include <asm/mc146818rtc.h>
 #include <asm/smp.h>
 #include <asm/desc.h>
+#include <asm/msi.h>
 #include <mach_apic.h>
 #include <io_ports.h>
+#include <public/physdev.h>
 
 /* Different to Linux: our implementation can be simpler. */
 #define make_8259A_irq(irq) (io_apic_irqs &= ~(1<<(irq)))
@@ -726,6 +730,7 @@ next:
 
 static struct hw_interrupt_type ioapic_level_type;
 static struct hw_interrupt_type ioapic_edge_type;
+struct hw_interrupt_type pci_msi_type;
 
 #define IOAPIC_AUTO    -1
 #define IOAPIC_EDGE    0
@@ -1554,6 +1559,59 @@ static struct hw_interrupt_type ioapic_l
     .set_affinity      = set_ioapic_affinity_vector,
 };
 
+static void mask_msi_vector(unsigned int vector)
+{
+    mask_msi_irq(vector);
+}
+
+static void unmask_msi_vector(unsigned int vector)
+{
+    unmask_msi_irq(vector);
+}
+
+static unsigned int startup_msi_vector(unsigned int vector)
+{
+    dprintk(XENLOG_INFO, "startup msi vector %x\n", vector);
+    unmask_msi_irq(vector);
+    return 0;
+}
+
+static void ack_msi_vector(unsigned int vector)
+{
+    ack_APIC_irq();
+}
+
+static void end_msi_vector(unsigned int vector)
+{
+}
+
+static void shutdown_msi_vector(unsigned int vector)
+{
+    dprintk(XENLOG_INFO, "shutdown msi vector %x\n", vector);
+    mask_msi_irq(vector);
+}
+
+static void set_msi_affinity_vector(unsigned int vector, cpumask_t cpu_mask)
+{
+    set_native_irq_info(vector, cpu_mask);
+    set_msi_irq_affinity(vector, cpu_mask);
+}
+
+/*
+ * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
+ * which implement the MSI or MSI-X Capability Structure.
+ */
+struct hw_interrupt_type pci_msi_type = {
+    .typename   = "PCI-MSI",
+    .startup    = startup_msi_vector,
+    .shutdown   = shutdown_msi_vector,
+    .enable        = unmask_msi_vector,
+    .disable    = mask_msi_vector,
+    .ack        = ack_msi_vector,
+    .end        = end_msi_vector,
+    .set_affinity   = set_msi_affinity_vector,
+};
+
 static inline void init_IO_APIC_traps(void)
 {
     int irq;
diff -r 6ecbb00e58cd -r 86c0353f19d0 xen/arch/x86/irq.c
--- a/xen/arch/x86/irq.c        Thu May 01 10:30:22 2008 +0100
+++ b/xen/arch/x86/irq.c        Thu May 01 10:31:29 2008 +0100
@@ -395,9 +395,11 @@ int pirq_acktype(struct domain *d, int i
     /*
      * Edge-triggered IO-APIC and LAPIC interrupts need no final
      * acknowledgement: we ACK early during interrupt processing.
+     * MSIs are treated as edge-triggered interrupts.
      */
     if ( !strcmp(desc->handler->typename, "IO-APIC-edge") ||
-         !strcmp(desc->handler->typename, "local-APIC-edge") )
+         !strcmp(desc->handler->typename, "local-APIC-edge") ||
+         !strcmp(desc->handler->typename, "PCI-MSI") )
         return ACKTYPE_NONE;
 
     /*
diff -r 6ecbb00e58cd -r 86c0353f19d0 xen/arch/x86/msi.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/msi.c        Thu May 01 10:31:29 2008 +0100
@@ -0,0 +1,787 @@
+/*
+ * File:    msi.c
+ * Purpose: PCI Message Signaled Interrupt (MSI)
+ *
+ * Copyright (C) 2003-2004 Intel
+ * Copyright (C) Tom Long Nguyen (tom.l.nguyen@xxxxxxxxx)
+ */
+
+#include <xen/config.h>
+#include <xen/lib.h>
+#include <xen/init.h>
+#include <xen/irq.h>
+#include <xen/delay.h>
+#include <xen/sched.h>
+#include <xen/acpi.h>
+#include <xen/errno.h>
+#include <xen/pci.h>
+#include <xen/pci_regs.h>
+#include <xen/keyhandler.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/desc.h>
+#include <asm/msi.h>
+#include <asm/fixmap.h>
+#include <mach_apic.h>
+#include <io_ports.h>
+#include <public/physdev.h>
+
+extern int msi_irq_enable;
+
+/* PCI-dev list with MSI/MSIX capabilities */
+DEFINE_SPINLOCK(msi_pdev_lock);
+struct list_head msi_pdev_list;
+
+struct pci_dev *get_msi_pdev(u8 bus, u8 devfn)
+{
+    struct pci_dev *pdev = NULL;
+
+    list_for_each_entry(pdev, &msi_pdev_list, msi_dev_list)
+        if ( pdev->bus == bus && pdev->devfn == devfn )
+            return pdev;
+
+    return NULL;
+}
+
+/* bitmap indicate which fixed map is free */
+DEFINE_SPINLOCK(msix_fixmap_lock);
+DECLARE_BITMAP(msix_fixmap_pages, MAX_MSIX_PAGES);
+
+static int msix_fixmap_alloc(void)
+{
+    int i;
+    int rc = -1;
+
+    spin_lock(&msix_fixmap_lock);
+    for ( i = 0; i < MAX_MSIX_PAGES; i++ )
+        if ( !test_bit(i, &msix_fixmap_pages) )
+            break;
+    if ( i == MAX_MSIX_PAGES )
+        goto out;
+    rc = FIX_MSIX_IO_RESERV_BASE + i;
+    set_bit(i, &msix_fixmap_pages);
+
+ out:
+    spin_unlock(&msix_fixmap_lock);
+    return rc;
+}
+
+static void msix_fixmap_free(int idx)
+{
+    if ( idx < FIX_MSIX_IO_RESERV_BASE )
+        return;
+
+    spin_lock(&msix_fixmap_lock);
+    clear_bit(idx - FIX_MSIX_IO_RESERV_BASE, &msix_fixmap_pages);
+    spin_unlock(&msix_fixmap_lock);
+}
+
+/*
+ * MSI message composition
+ */
+static void msi_compose_msg(struct pci_dev *pdev, int vector,
+                            struct msi_msg *msg)
+{
+    unsigned dest;
+    cpumask_t tmp;
+
+    tmp = TARGET_CPUS;
+    if ( vector )
+    {
+        dest = cpu_mask_to_apicid(tmp);
+
+        msg->address_hi = MSI_ADDR_BASE_HI;
+        msg->address_lo =
+            MSI_ADDR_BASE_LO |
+            ((INT_DEST_MODE == 0) ?
+                MSI_ADDR_DESTMODE_PHYS:
+                MSI_ADDR_DESTMODE_LOGIC) |
+            ((INT_DELIVERY_MODE != dest_LowestPrio) ?
+                MSI_ADDR_REDIRECTION_CPU:
+                MSI_ADDR_REDIRECTION_LOWPRI) |
+            MSI_ADDR_DEST_ID(dest);
+
+        msg->data =
+            MSI_DATA_TRIGGER_EDGE |
+            MSI_DATA_LEVEL_ASSERT |
+            ((INT_DELIVERY_MODE != dest_LowestPrio) ?
+                MSI_DATA_DELIVERY_FIXED:
+                MSI_DATA_DELIVERY_LOWPRI) |
+            MSI_DATA_VECTOR(vector);
+    }
+}
+
+void read_msi_msg(unsigned int irq, struct msi_msg *msg)
+{
+    struct msi_desc *entry = irq_desc[irq].msi_desc;
+
+    switch ( entry->msi_attrib.type )
+    {
+    case PCI_CAP_ID_MSI:
+    {
+        struct pci_dev *dev = entry->dev;
+        int pos = entry->msi_attrib.pos;
+        u16 data;
+        u8 bus = dev->bus;
+        u8 slot = PCI_SLOT(dev->devfn);
+        u8 func = PCI_FUNC(dev->devfn);
+
+        msg->address_lo = pci_conf_read32(bus, slot, func,
+                                          msi_lower_address_reg(pos));
+        if ( entry->msi_attrib.is_64 )
+        {
+            msg->address_hi = pci_conf_read32(bus, slot, func,
+                                              msi_upper_address_reg(pos));
+            data = pci_conf_read16(bus, slot, func, msi_data_reg(pos, 1));
+        }
+        else
+        {
+            msg->address_hi = 0;
+            data = pci_conf_read16(bus, slot, func, msi_data_reg(pos, 0));
+        }
+        msg->data = data;
+        break;
+    }
+    case PCI_CAP_ID_MSIX:
+    {
+        void __iomem *base;
+        base = entry->mask_base +
+            entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
+
+        msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
+        msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
+        msg->data = readl(base + PCI_MSIX_ENTRY_DATA_OFFSET);
+        break;
+    }
+    default:
+        BUG();
+    }
+}
+
+static int set_vector_msi(struct msi_desc *entry)
+{
+    irq_desc_t *desc;
+    unsigned long flags;
+
+    if ( entry->vector >= NR_VECTORS )
+    {
+        dprintk(XENLOG_ERR, "Trying to install msi data for Vector %d\n",
+                entry->vector);
+        return -EINVAL;
+    }
+
+    desc = &irq_desc[entry->vector];
+    spin_lock_irqsave(&desc->lock, flags);
+    desc->msi_desc = entry;
+    spin_unlock_irqrestore(&desc->lock, flags);
+
+    return 0;
+}
+
+static int unset_vector_msi(int vector)
+{
+    irq_desc_t *desc;
+    unsigned long flags;
+
+    if ( vector >= NR_VECTORS )
+    {
+        dprintk(XENLOG_ERR, "Trying to uninstall msi data for Vector %d\n",
+                vector);
+        return -EINVAL;
+    }
+
+    desc = &irq_desc[vector];
+    spin_lock_irqsave(&desc->lock, flags);
+    desc->msi_desc = NULL;
+    spin_unlock_irqrestore(&desc->lock, flags);
+
+    return 0;
+}
+
+void write_msi_msg(unsigned int irq, struct msi_msg *msg)
+{
+    struct msi_desc *entry = irq_desc[irq].msi_desc;
+
+    switch ( entry->msi_attrib.type )
+    {
+    case PCI_CAP_ID_MSI:
+    {
+        struct pci_dev *dev = entry->dev;
+        int pos = entry->msi_attrib.pos;
+        u8 bus = dev->bus;
+        u8 slot = PCI_SLOT(dev->devfn);
+        u8 func = PCI_FUNC(dev->devfn);
+
+        pci_conf_write32(bus, slot, func, msi_lower_address_reg(pos),
+                         msg->address_lo);
+        if ( entry->msi_attrib.is_64 )
+        {
+            pci_conf_write32(bus, slot, func, msi_upper_address_reg(pos),
+                             msg->address_hi);
+            pci_conf_write16(bus, slot, func, msi_data_reg(pos, 1),
+                             msg->data);
+        }
+        else
+            pci_conf_write16(bus, slot, func, msi_data_reg(pos, 0),
+                             msg->data);
+        break;
+    }
+    case PCI_CAP_ID_MSIX:
+    {
+        void __iomem *base;
+        base = entry->mask_base +
+            entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
+
+        writel(msg->address_lo,
+            base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
+        writel(msg->address_hi,
+            base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
+        writel(msg->data, base + PCI_MSIX_ENTRY_DATA_OFFSET);
+        break;
+    }
+    default:
+        BUG();
+    }
+    entry->msg = *msg;
+}
+
+void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
+{
+    struct msi_msg msg;
+    unsigned int dest;
+
+    memset(&msg, 0, sizeof(msg));
+    cpus_and(mask, mask, cpu_online_map);
+    if ( cpus_empty(mask) )
+        mask = TARGET_CPUS;
+    dest = cpu_mask_to_apicid(mask);
+
+    read_msi_msg(irq, &msg);
+
+    msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+    msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+
+    write_msi_msg(irq, &msg);
+}
+
+static void msi_set_enable(struct pci_dev *dev, int enable)
+{
+    int pos;
+    u16 control;
+    u8 bus = dev->bus;
+    u8 slot = PCI_SLOT(dev->devfn);
+    u8 func = PCI_FUNC(dev->devfn);
+
+    pos = pci_find_cap_offset(bus, slot, func, PCI_CAP_ID_MSI);
+    if ( pos )
+    {
+        control = pci_conf_read16(bus, slot, func, pos + PCI_MSI_FLAGS);
+        control &= ~PCI_MSI_FLAGS_ENABLE;
+        if ( enable )
+            control |= PCI_MSI_FLAGS_ENABLE;
+        pci_conf_write16(bus, slot, func, pos + PCI_MSI_FLAGS, control);
+    }
+}
+
+void msix_set_enable(struct pci_dev *dev, int enable)
+{
+    int pos;
+    u16 control;
+    u8 bus = dev->bus;
+    u8 slot = PCI_SLOT(dev->devfn);
+    u8 func = PCI_FUNC(dev->devfn);
+
+    pos = pci_find_cap_offset(bus, slot, func, PCI_CAP_ID_MSIX);
+    if ( pos )
+    {
+        control = pci_conf_read16(bus, slot, func, pos + PCI_MSIX_FLAGS);
+        control &= ~PCI_MSIX_FLAGS_ENABLE;
+        if ( enable )
+            control |= PCI_MSIX_FLAGS_ENABLE;
+        pci_conf_write16(bus, slot, func, pos + PCI_MSIX_FLAGS, control);
+    }
+}
+
+static void msix_flush_writes(unsigned int irq)
+{
+    struct msi_desc *entry = irq_desc[irq].msi_desc;
+
+    BUG_ON(!entry || !entry->dev);
+    switch (entry->msi_attrib.type) {
+    case PCI_CAP_ID_MSI:
+        /* nothing to do */
+        break;
+    case PCI_CAP_ID_MSIX:
+    {
+        int offset = entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
+            PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET;
+        readl(entry->mask_base + offset);
+        break;
+    }
+    default:
+        BUG();
+        break;
+    }
+}
+
+static void msi_set_mask_bit(unsigned int irq, int flag)
+{
+    struct msi_desc *entry = irq_desc[irq].msi_desc;
+
+    BUG_ON(!entry || !entry->dev);
+    switch (entry->msi_attrib.type) {
+    case PCI_CAP_ID_MSI:
+        if (entry->msi_attrib.maskbit) {
+            int pos;
+            u32 mask_bits;
+            u8 bus = entry->dev->bus;
+            u8 slot = PCI_SLOT(entry->dev->devfn);
+            u8 func = PCI_FUNC(entry->dev->devfn);
+
+            pos = (long)entry->mask_base;
+            mask_bits = pci_conf_read32(bus, slot, func, pos);
+            mask_bits &= ~(1);
+            mask_bits |= flag;
+            pci_conf_write32(bus, slot, func, pos, mask_bits);
+        } else {
+            msi_set_enable(entry->dev, !flag);
+        }
+        break;
+    case PCI_CAP_ID_MSIX:
+    {
+        int offset = entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
+            PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET;
+        writel(flag, entry->mask_base + offset);
+        readl(entry->mask_base + offset);
+        break;
+    }
+    default:
+        BUG();
+        break;
+    }
+    entry->msi_attrib.masked = !!flag;
+}
+
+void mask_msi_irq(unsigned int irq)
+{
+    msi_set_mask_bit(irq, 1);
+    msix_flush_writes(irq);
+}
+
+void unmask_msi_irq(unsigned int irq)
+{
+    msi_set_mask_bit(irq, 0);
+    msix_flush_writes(irq);
+}
+
+static struct msi_desc* alloc_msi_entry(void)
+{
+    struct msi_desc *entry;
+
+    entry = xmalloc(struct msi_desc);
+    if ( !entry )
+        return NULL;
+
+    INIT_LIST_HEAD(&entry->list);
+    entry->dev = NULL;
+
+    return entry;
+}
+
+static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
+{
+    struct msi_msg msg;
+
+    msi_compose_msg(dev, desc->vector, &msg);
+    set_vector_msi(desc);
+    write_msi_msg(desc->vector, &msg);
+
+    return 0;
+}
+
+static void teardown_msi_vector(int vector)
+{
+    unset_vector_msi(vector);
+}
+
+static void msi_free_vector(int vector)
+{
+    struct msi_desc *entry;
+
+    entry = irq_desc[vector].msi_desc;
+
+    teardown_msi_vector(vector);
+
+    if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
+    {
+        unsigned long start;
+
+        writel(1, entry->mask_base + entry->msi_attrib.entry_nr
+              * PCI_MSIX_ENTRY_SIZE
+              + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
+
+        start = (unsigned long)entry->mask_base & ~(PAGE_SIZE - 1);
+        msix_fixmap_free(virt_to_fix(start));
+        destroy_xen_mappings(start, start + PAGE_SIZE);
+    }
+    list_del(&entry->list);
+    xfree(entry);
+}
+
+void msi_free_vectors(struct pci_dev* dev)
+{
+    struct msi_desc *entry, *tmp;
+
+    list_for_each_entry_safe( entry, tmp, &dev->msi_list, list )
+        msi_free_vector(entry->vector);
+}
+
+static struct msi_desc *find_msi_entry(struct pci_dev *dev,
+                                       int vector, int cap_id)
+{
+    struct msi_desc *entry;
+
+    list_for_each_entry( entry, &dev->msi_list, list )
+    {
+        if ( entry->msi_attrib.type == cap_id &&
+             (vector == -1 || entry->vector == vector) )
+            return entry;
+    }
+
+    return NULL;
+}
+
+/**
+ * msi_capability_init - configure device's MSI capability structure
+ * @dev: pointer to the pci_dev data structure of MSI device function
+ *
+ * Setup the MSI capability structure of device function with a single
+ * MSI irq, regardless of device function is capable of handling
+ * multiple messages. A return of zero indicates the successful setup
+ * of an entry zero with the new MSI irq or non-zero for otherwise.
+ **/
+static int msi_capability_init(struct pci_dev *dev, int vector)
+{
+    struct msi_desc *entry;
+    int pos, ret;
+    u16 control;
+    u8 bus = dev->bus;
+    u8 slot = PCI_SLOT(dev->devfn);
+    u8 func = PCI_FUNC(dev->devfn);
+
+    pos = pci_find_cap_offset(bus, slot, func, PCI_CAP_ID_MSI);
+    control = pci_conf_read16(bus, slot, func, msi_control_reg(pos));
+    /* MSI Entry Initialization */
+    msi_set_enable(dev, 0); /* Ensure msi is disabled as I set it up */
+
+    entry = alloc_msi_entry();
+    if ( !entry )
+        return -ENOMEM;
+
+    entry->msi_attrib.type = PCI_CAP_ID_MSI;
+    entry->msi_attrib.is_64 = is_64bit_address(control);
+    entry->msi_attrib.entry_nr = 0;
+    entry->msi_attrib.maskbit = is_mask_bit_support(control);
+    entry->msi_attrib.masked = 1;
+    entry->msi_attrib.pos = pos;
+    entry->vector = vector;
+    if ( is_mask_bit_support(control) )
+        entry->mask_base = (void __iomem *)(long)msi_mask_bits_reg(pos,
+                is_64bit_address(control));
+    entry->dev = dev;
+    if ( entry->msi_attrib.maskbit )
+    {
+        unsigned int maskbits, temp;
+        /* All MSIs are unmasked by default, Mask them all */
+        maskbits = pci_conf_read32(bus, slot, func,
+                       msi_mask_bits_reg(pos, is_64bit_address(control)));
+        temp = (1 << multi_msi_capable(control));
+        temp = ((temp - 1) & ~temp);
+        maskbits |= temp;
+        pci_conf_write32(bus, slot, func,
+            msi_mask_bits_reg(pos, is_64bit_address(control)),
+            maskbits);
+    }
+    list_add_tail(&entry->list, &dev->msi_list);
+
+    /* Configure MSI capability structure */
+    ret = setup_msi_irq(dev, entry);
+    if ( ret )
+    {
+        msi_free_vector(vector);
+        return ret;
+    }
+
+    /* Restore the original MSI enabled bits  */
+    pci_conf_write16(bus, slot, func, msi_control_reg(pos), control);
+
+    return 0;
+}
+
+static u64 pci_resource_start(struct pci_dev *dev, u8 bar_index)
+{
+    unsigned long bar_base;
+    u8 bus = dev->bus;
+    u8 slot = PCI_SLOT(dev->devfn);
+    u8 func = PCI_FUNC(dev->devfn);
+
+    bar_base = pci_conf_read32(bus, slot, func,
+                               PCI_BASE_ADDRESS_0 + 4 * bar_index);
+    if ( bar_base & PCI_BASE_ADDRESS_MEM_TYPE_64 )
+    {
+        bar_base <<= 32;
+        bar_base += pci_conf_read32(bus, slot, func,
+                               PCI_BASE_ADDRESS_0 + 4 * (bar_index + 1));
+    }
+
+    return bar_base;
+}
+
+/**
+ * msix_capability_init - configure device's MSI-X capability
+ * @dev: pointer to the pci_dev data structure of MSI-X device function
+ * @entries: pointer to an array of struct msix_entry entries
+ * @nvec: number of @entries
+ *
+ * Setup the MSI-X capability structure of device function with a
+ * single MSI-X irq. A return of zero indicates the successful setup of
+ * requested MSI-X entries with allocated irqs or non-zero for otherwise.
+ **/
+static int msix_capability_init(struct pci_dev *dev, int vector, int entry_nr)
+{
+    struct msi_desc *entry;
+    int pos;
+    u16 control;
+    unsigned long phys_addr;
+    u32 table_offset;
+    u8 bir;
+    void __iomem *base;
+    int idx;
+    u8 bus = dev->bus;
+    u8 slot = PCI_SLOT(dev->devfn);
+    u8 func = PCI_FUNC(dev->devfn);
+
+    pos = pci_find_cap_offset(bus, slot, func, PCI_CAP_ID_MSIX);
+    control = pci_conf_read16(bus, slot, func, msix_control_reg(pos));
+    msix_set_enable(dev, 0);/* Ensure msix is disabled as I set it up */
+
+    /* MSI-X Table Initialization */
+    entry = alloc_msi_entry();
+    if ( !entry )
+        return -ENOMEM;
+
+    /* Request & Map MSI-X table region */
+    table_offset = pci_conf_read32(bus, slot, func, 
msix_table_offset_reg(pos));
+    bir = (u8)(table_offset & PCI_MSIX_FLAGS_BIRMASK);
+    table_offset &= ~PCI_MSIX_FLAGS_BIRMASK;
+    phys_addr = pci_resource_start(dev, bir) + table_offset;
+    idx = msix_fixmap_alloc();
+    if ( idx < 0 )
+    {
+        xfree(entry);
+        return -ENOMEM;
+    }
+    set_fixmap_nocache(idx, phys_addr);
+    base = (void *)(fix_to_virt(idx) + (phys_addr & ((1UL << PAGE_SHIFT) - 
1)));
+
+    entry->msi_attrib.type = PCI_CAP_ID_MSIX;
+    entry->msi_attrib.is_64 = 1;
+    entry->msi_attrib.entry_nr = entry_nr;
+    entry->msi_attrib.maskbit = 1;
+    entry->msi_attrib.masked = 1;
+    entry->msi_attrib.pos = pos;
+    entry->vector = vector;
+    entry->dev = dev;
+    entry->mask_base = base;
+
+    list_add_tail(&entry->list, &dev->msi_list);
+
+    setup_msi_irq(dev, entry);
+
+    /* Set MSI-X enabled bits */
+    pci_conf_write16(bus, slot, func, msix_control_reg(pos), control);
+
+    return 0;
+}
+
+/**
+ * pci_enable_msi - configure device's MSI capability structure
+ * @dev: pointer to the pci_dev data structure of MSI device function
+ *
+ * Setup the MSI capability structure of device function with
+ * a single MSI irq upon its software driver call to request for
+ * MSI mode enabled on its hardware device function. A return of zero
+ * indicates the successful setup of an entry zero with the new MSI
+ * irq or non-zero for otherwise.
+ **/
+static int __pci_enable_msi(u8 bus, u8 devfn, int vector)
+{
+    int status;
+    struct pci_dev *dev;
+
+    dev = get_msi_pdev(bus, devfn);
+    if ( !dev )
+    {
+        dev = xmalloc(struct pci_dev);
+        if ( !dev )
+            return -ENOMEM;
+        dev->bus = bus;
+        dev->devfn = devfn;
+        INIT_LIST_HEAD(&dev->msi_list);
+    }
+
+    if ( find_msi_entry(dev, vector, PCI_CAP_ID_MSI) )
+    {
+        dprintk(XENLOG_WARNING, "vector %d has already mapped to MSI on device 
\
+            %02x:%02x.%01x.\n", vector, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+        return 0;
+    }
+
+    status = msi_capability_init(dev, vector);
+
+    if ( dev != get_msi_pdev(bus, devfn) )
+    {
+        spin_lock(&msi_pdev_lock);
+        list_add_tail(&dev->msi_dev_list, &msi_pdev_list);
+        spin_unlock(&msi_pdev_lock);
+    }
+
+    return status;
+}
+
+static void __pci_disable_msi(int vector)
+{
+    struct msi_desc *entry;
+    struct pci_dev *dev;
+    int pos;
+    u16 control;
+    u8 bus, slot, func;
+
+    entry = irq_desc[vector].msi_desc;
+    dev = entry->dev;
+    bus = dev->bus;
+    slot = PCI_SLOT(dev->devfn);
+    func = PCI_FUNC(dev->devfn);
+
+    pos = pci_find_cap_offset(bus, slot, func, PCI_CAP_ID_MSI);
+    control = pci_conf_read16(bus, slot, func, msi_control_reg(pos));
+    msi_set_enable(dev, 0);
+
+    BUG_ON(list_empty(&dev->msi_list));
+
+    msi_free_vector(vector);
+
+    pci_conf_write16(bus, slot, func, msi_control_reg(pos), control);
+}
+
+/**
+ * pci_enable_msix - configure device's MSI-X capability structure
+ * @dev: pointer to the pci_dev data structure of MSI-X device function
+ * @entries: pointer to an array of MSI-X entries
+ * @nvec: number of MSI-X irqs requested for allocation by device driver
+ *
+ * Setup the MSI-X capability structure of device function with the number
+ * of requested irqs upon its software driver call to request for
+ * MSI-X mode enabled on its hardware device function. A return of zero
+ * indicates the successful configuration of MSI-X capability structure
+ * with new allocated MSI-X irqs. A return of < 0 indicates a failure.
+ * Or a return of > 0 indicates that driver request is exceeding the number
+ * of irqs available. Driver should use the returned value to re-send
+ * its request.
+ **/
+static int __pci_enable_msix(u8 bus, u8 devfn, int vector, int entry_nr)
+{
+    int status, pos, nr_entries;
+    struct pci_dev *dev;
+    u16 control;
+    u8 slot = PCI_SLOT(devfn);
+    u8 func = PCI_FUNC(devfn);
+
+    pos = pci_find_cap_offset(bus, slot, func, PCI_CAP_ID_MSIX);
+    control = pci_conf_read16(bus, slot, func, msi_control_reg(pos));
+    nr_entries = multi_msix_capable(control);
+    if (entry_nr > nr_entries)
+        return -EINVAL;
+
+    /* Check whether driver already requested for MSI-X irqs */
+    dev = get_msi_pdev(bus, devfn);
+
+    if ( !dev )
+    {
+        dev = xmalloc(struct pci_dev);
+        if ( !dev )
+            return -ENOMEM;
+        dev->bus = bus;
+        dev->devfn = devfn;
+        INIT_LIST_HEAD(&dev->msi_list);
+    }
+
+    if ( find_msi_entry(dev, vector, PCI_CAP_ID_MSIX) )
+    {
+        dprintk(XENLOG_WARNING, "vector %d has already mapped to MSIX on \
+                device %02x:%02x.%01x.\n", vector, bus,
+                PCI_SLOT(devfn), PCI_FUNC(devfn));
+        return 0;
+    }
+
+    status = msix_capability_init(dev, vector, entry_nr);
+
+    if ( dev != get_msi_pdev(bus, devfn) )
+    {
+        spin_lock(&msi_pdev_lock);
+        list_add_tail(&dev->msi_dev_list, &msi_pdev_list);
+        spin_unlock(&msi_pdev_lock);
+    }
+
+    return status;
+}
+
+static void __pci_disable_msix(int vector)
+{
+    struct msi_desc *entry;
+    struct pci_dev *dev;
+    int pos;
+    u16 control;
+    u8 bus, slot, func;
+
+    entry = irq_desc[vector].msi_desc;
+    dev = entry->dev;
+    bus = dev->bus;
+    slot = PCI_SLOT(dev->devfn);
+    func = PCI_FUNC(dev->devfn);
+
+    pos = pci_find_cap_offset(bus, slot, func, PCI_CAP_ID_MSIX);
+    control = pci_conf_read16(bus, slot, func, msix_control_reg(pos));
+    msi_set_enable(dev, 0);
+
+    BUG_ON(list_empty(&dev->msi_list));
+
+    msi_free_vector(vector);
+
+    pci_conf_write16(bus, slot, func, msix_control_reg(pos), control);
+}
+
+int pci_enable_msi(u8 bus, u8 devfn, int vector, int entry_nr, int msi)
+{
+    if ( msi )
+        return __pci_enable_msi(bus, devfn, vector);
+    else
+        return __pci_enable_msix(bus, devfn, vector, entry_nr);
+}
+
+void pci_disable_msi(int vector)
+{
+    irq_desc_t *desc;
+
+    desc = &irq_desc[vector];
+    if ( desc->msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
+        __pci_disable_msi(vector);
+    else if ( desc->msi_desc->msi_attrib.type == PCI_CAP_ID_MSIX )
+        __pci_disable_msix(vector);
+}
+
+void pci_cleanup_msi(struct pci_dev *dev)
+{
+    msi_free_vectors(dev);
+}
+
diff -r 6ecbb00e58cd -r 86c0353f19d0 xen/arch/x86/physdev.c
--- a/xen/arch/x86/physdev.c    Thu May 01 10:30:22 2008 +0100
+++ b/xen/arch/x86/physdev.c    Thu May 01 10:31:29 2008 +0100
@@ -9,6 +9,7 @@
 #include <xen/guest_access.h>
 #include <xen/iocap.h>
 #include <asm/current.h>
+#include <asm/msi.h>
 #include <asm/hypercall.h>
 #include <public/xen.h>
 #include <public/physdev.h>
@@ -25,6 +26,9 @@ ioapic_guest_write(
 ioapic_guest_write(
     unsigned long physbase, unsigned int reg, u32 pval);
 
+
+extern struct hw_interrupt_type pci_msi_type;
+
 static int get_free_pirq(struct domain *d, int type, int index)
 {
     int i;
@@ -57,7 +61,8 @@ static int get_free_pirq(struct domain *
 /*
  * Caller hold the irq_lock
  */
-static int map_domain_pirq(struct domain *d, int pirq, int vector, int type)
+static int map_domain_pirq(struct domain *d, int pirq, int vector,
+                           struct physdev_map_pirq *map)
 {
     int ret = 0;
     int old_vector, old_pirq;
@@ -95,6 +100,28 @@ static int map_domain_pirq(struct domain
         gdprintk(XENLOG_G_ERR, "add irq permit access %x failed\n", pirq);
         ret = -EINVAL;
         goto done;
+    }
+
+    if ( map && MAP_PIRQ_TYPE_MSI == map->type )
+    {
+        irq_desc_t         *desc;
+        unsigned long flags;
+
+        desc = &irq_desc[vector];
+
+        spin_lock_irqsave(&desc->lock, flags);
+        if ( desc->handler != &no_irq_type )
+            gdprintk(XENLOG_G_ERR, "Map vector %x to msi while it is in use\n",
+                     vector);
+        desc->handler = &pci_msi_type;
+        spin_unlock_irqrestore(&desc->lock, flags);
+
+        ret = pci_enable_msi(map->msi_info.bus,
+                                    map->msi_info.devfn, vector,
+                                                        map->msi_info.entry_nr,
+                                                        map->msi_info.msi);
+        if ( ret )
+            goto done;
     }
 
     d->arch.pirq_vector[pirq] = vector;
@@ -129,7 +156,26 @@ static int unmap_domain_pirq(struct doma
         ret = -EINVAL;
     }
     else
+    {
+        unsigned long flags;
+        irq_desc_t *desc;
+
+        desc = &irq_desc[vector];
+        if ( desc->msi_desc )
+            pci_disable_msi(vector);
+
+        spin_lock_irqsave(&desc->lock, flags);
+        if ( desc->handler == &pci_msi_type )
+        {
+            /* MSI is not shared, so should be released already */
+            BUG_ON(desc->status & IRQ_GUEST);
+            irq_desc[vector].handler = &no_irq_type;
+        }
+        spin_unlock_irqrestore(&desc->lock, flags);
+
         d->arch.pirq_vector[pirq] = d->arch.vector_pirq[vector] = 0;
+    }
+
     ret = irq_deny_access(d, pirq);
 
     if ( ret )
@@ -187,6 +233,9 @@ static int physdev_map_pirq(struct physd
             break;
         case MAP_PIRQ_TYPE_MSI:
             vector = map->index;
+                       if ( vector == -1 )
+                               vector = assign_irq_vector(AUTO_ASSIGN);
+
             if ( vector < 0 || vector >= NR_VECTORS )
             {
                 ret = -EINVAL;
@@ -237,7 +286,8 @@ static int physdev_map_pirq(struct physd
             pirq = map->pirq;
     }
 
-    ret = map_domain_pirq(d, pirq, vector, map->type);
+
+    ret = map_domain_pirq(d, pirq, vector, map);
 
     if ( !ret )
         map->pirq = pirq;
@@ -331,6 +381,7 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
             break;
 
         ret = physdev_map_pirq(&map);
+
         if ( copy_to_guest(arg, &map, 1) != 0 )
             ret = -EFAULT;
         break;
@@ -397,7 +448,7 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
 
         irq = irq_op.irq;
         ret = -EINVAL;
-        if ( (irq < 0) || (irq >= NR_IRQS) )
+        if ( ((irq < 0) && (irq != AUTO_ASSIGN)) || (irq >= NR_IRQS) )
             break;
 
         irq_op.vector = assign_irq_vector(irq);
@@ -408,8 +459,7 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
         {
             spin_lock_irqsave(&dom0->arch.irq_lock, flags);
             if ( irq != AUTO_ASSIGN )
-                ret = map_domain_pirq(dom0, irq_op.irq, irq_op.vector,
-                                     MAP_PIRQ_TYPE_GSI);
+                ret = map_domain_pirq(dom0, irq_op.irq, irq_op.vector, NULL);
             spin_unlock_irqrestore(&dom0->arch.irq_lock, flags);
         }
 
diff -r 6ecbb00e58cd -r 86c0353f19d0 xen/drivers/passthrough/amd/iommu_init.c
--- a/xen/drivers/passthrough/amd/iommu_init.c  Thu May 01 10:30:22 2008 +0100
+++ b/xen/drivers/passthrough/amd/iommu_init.c  Thu May 01 10:31:29 2008 +0100
@@ -23,6 +23,7 @@
 #include <xen/pci.h>
 #include <xen/pci_regs.h>
 #include <asm/amd-iommu.h>
+#include <asm/msi.h>
 #include <asm/hvm/svm/amd-iommu-proto.h>
 #include <asm-x86/fixmap.h>
 
@@ -292,7 +293,7 @@ static void amd_iommu_msi_addr_init(stru
     u32 address_lo = MSI_ADDR_HEADER |
             MSI_ADDR_DESTMODE_PHYS |
             MSI_ADDR_REDIRECTION_CPU |
-            MSI_ADDR_DESTID_CPU(phy_cpu);
+            MSI_ADDR_DEST_ID(phy_cpu);
 
     pci_conf_write32(bus, dev, func,
         iommu->msi_cap + PCI_MSI_ADDRESS_LO, address_lo);
diff -r 6ecbb00e58cd -r 86c0353f19d0 xen/drivers/passthrough/vtd/intremap.c
--- a/xen/drivers/passthrough/vtd/intremap.c    Thu May 01 10:30:22 2008 +0100
+++ b/xen/drivers/passthrough/vtd/intremap.c    Thu May 01 10:31:29 2008 +0100
@@ -24,10 +24,10 @@
 #include <xen/time.h>
 #include <xen/pci.h>
 #include <xen/pci_regs.h>
+#include <asm/msi.h>
 #include "iommu.h"
 #include "dmar.h"
 #include "vtd.h"
-#include "msi.h"
 #include "extern.h"
 
 u16 apicid_to_bdf(int apic_id)
diff -r 6ecbb00e58cd -r 86c0353f19d0 xen/drivers/passthrough/vtd/iommu.c
--- a/xen/drivers/passthrough/vtd/iommu.c       Thu May 01 10:30:22 2008 +0100
+++ b/xen/drivers/passthrough/vtd/iommu.c       Thu May 01 10:31:29 2008 +0100
@@ -29,9 +29,9 @@
 #include <xen/pci.h>
 #include <xen/pci_regs.h>
 #include <asm/paging.h>
+#include <asm/msi.h>
 #include "iommu.h"
 #include "dmar.h"
-#include "msi.h"
 #include "extern.h"
 #include "vtd.h"
 
diff -r 6ecbb00e58cd -r 86c0353f19d0 xen/drivers/passthrough/vtd/msi.h
--- a/xen/drivers/passthrough/vtd/msi.h Thu May 01 10:30:22 2008 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,127 +0,0 @@
-/*
- * Copyright (C) 2003-2004 Intel
- * Copyright (C) Tom Long Nguyen (tom.l.nguyen@xxxxxxxxx)
- */
-
-#ifndef MSI_H
-#define MSI_H
-
-/*
- * Assume the maximum number of hot plug slots supported by the system is about
- * ten. The worstcase is that each of these slots is hot-added with a device,
- * which has two MSI/MSI-X capable functions. To avoid any MSI-X driver, which
- * attempts to request all available vectors, NR_HP_RESERVED_VECTORS is defined
- * as below to ensure at least one message is assigned to each detected MSI/
- * MSI-X device function.
- */
-#define NR_HP_RESERVED_VECTORS         20
-
-extern int vector_irq[NR_VECTORS];
-extern int pci_vector_resources(int last, int nr_released);
-
-/*
- * MSI-X Address Register
- */
-#define PCI_MSIX_FLAGS_QSIZE           0x7FF
-#define PCI_MSIX_FLAGS_ENABLE          (1 << 15)
-#define PCI_MSIX_FLAGS_BIRMASK         (7 << 0)
-#define PCI_MSIX_FLAGS_BITMASK         (1 << 0)
-
-#define PCI_MSIX_ENTRY_SIZE                    16
-#define  PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET      0
-#define  PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET      4
-#define  PCI_MSIX_ENTRY_DATA_OFFSET            8
-#define  PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET     12
-
-#define msi_control_reg(base)          (base + PCI_MSI_FLAGS)
-#define msi_lower_address_reg(base)    (base + PCI_MSI_ADDRESS_LO)
-#define msi_upper_address_reg(base)    (base + PCI_MSI_ADDRESS_HI)
-#define msi_data_reg(base, is64bit)    \
-       ( (is64bit == 1) ? base+PCI_MSI_DATA_64 : base+PCI_MSI_DATA_32 )
-#define msi_mask_bits_reg(base, is64bit) \
-       ( (is64bit == 1) ? base+PCI_MSI_MASK_BIT : base+PCI_MSI_MASK_BIT-4)
-#define msi_disable(control)           control &= ~PCI_MSI_FLAGS_ENABLE
-#define multi_msi_capable(control) \
-       (1 << ((control & PCI_MSI_FLAGS_QMASK) >> 1))
-#define multi_msi_enable(control, num) \
-       control |= (((num >> 1) << 4) & PCI_MSI_FLAGS_QSIZE);
-#define is_64bit_address(control)      (control & PCI_MSI_FLAGS_64BIT)
-#define is_mask_bit_support(control)   (control & PCI_MSI_FLAGS_MASKBIT)
-#define msi_enable(control, num) multi_msi_enable(control, num); \
-       control |= PCI_MSI_FLAGS_ENABLE
-
-#define msix_table_offset_reg(base)    (base + 0x04)
-#define msix_pba_offset_reg(base)      (base + 0x08)
-#define msix_enable(control)           control |= PCI_MSIX_FLAGS_ENABLE
-#define msix_disable(control)          control &= ~PCI_MSIX_FLAGS_ENABLE
-#define msix_table_size(control)       ((control & PCI_MSIX_FLAGS_QSIZE)+1)
-#define multi_msix_capable             msix_table_size
-#define msix_unmask(address)           (address & ~PCI_MSIX_FLAGS_BITMASK)
-#define msix_mask(address)             (address | PCI_MSIX_FLAGS_BITMASK)
-#define msix_is_pending(address)       (address & PCI_MSIX_FLAGS_PENDMASK)
-
-/*
- * MSI Defined Data Structures
- */
-#define MSI_ADDRESS_HEADER             0xfee
-#define MSI_ADDRESS_HEADER_SHIFT       12
-#define MSI_ADDRESS_HEADER_MASK                0xfff000
-#define MSI_ADDRESS_DEST_ID_MASK       0xfff0000f
-#define MSI_TARGET_CPU_MASK            0xff
-#define MSI_TARGET_CPU_SHIFT           12
-#define MSI_DELIVERY_MODE              0
-#define MSI_LEVEL_MODE                 1       /* Edge always assert */
-#define MSI_TRIGGER_MODE               0       /* MSI is edge sensitive */
-#define MSI_PHYSICAL_MODE              0
-#define MSI_LOGICAL_MODE               1
-#define MSI_REDIRECTION_HINT_MODE      0
-
-#define __LITTLE_ENDIAN_BITFIELD       1
-
-struct msg_data {
-#if defined(__LITTLE_ENDIAN_BITFIELD)
-       __u32   vector          :  8;
-       __u32   delivery_mode   :  3;   /* 000b: FIXED | 001b: lowest prior */
-       __u32   reserved_1      :  3;
-       __u32   level           :  1;   /* 0: deassert | 1: assert */
-       __u32   trigger         :  1;   /* 0: edge | 1: level */
-       __u32   reserved_2      : 16;
-#elif defined(__BIG_ENDIAN_BITFIELD)
-       __u32   reserved_2      : 16;
-       __u32   trigger         :  1;   /* 0: edge | 1: level */
-       __u32   level           :  1;   /* 0: deassert | 1: assert */
-       __u32   reserved_1      :  3;
-       __u32   delivery_mode   :  3;   /* 000b: FIXED | 001b: lowest prior */
-       __u32   vector          :  8;
-#else
-#error "Bitfield endianness not defined! Check your byteorder.h"
-#endif
-} __attribute__ ((packed));
-
-struct msg_address {
-       union {
-               struct {
-#if defined(__LITTLE_ENDIAN_BITFIELD)
-                       __u32   reserved_1      :  2;
-                       __u32   dest_mode       :  1;   /*0:physic | 1:logic */
-                       __u32   redirection_hint:  1;   /*0: dedicated CPU
-                                                         1: lowest priority */
-                       __u32   reserved_2      :  4;
-                       __u32   dest_id         : 24;   /* Destination ID */
-#elif defined(__BIG_ENDIAN_BITFIELD)
-                       __u32   dest_id         : 24;   /* Destination ID */
-                       __u32   reserved_2      :  4;
-                       __u32   redirection_hint:  1;   /*0: dedicated CPU
-                                                         1: lowest priority */
-                       __u32   dest_mode       :  1;   /*0:physic | 1:logic */
-                       __u32   reserved_1      :  2;
-#else
-#error "Bitfield endianness not defined! Check your byteorder.h"
-#endif
-               }u;
-                       __u32  value;
-       }lo_address;
-       __u32   hi_address;
-} __attribute__ ((packed));
-
-#endif /* MSI_H */
diff -r 6ecbb00e58cd -r 86c0353f19d0 xen/drivers/passthrough/vtd/qinval.c
--- a/xen/drivers/passthrough/vtd/qinval.c      Thu May 01 10:30:22 2008 +0100
+++ b/xen/drivers/passthrough/vtd/qinval.c      Thu May 01 10:31:29 2008 +0100
@@ -24,10 +24,10 @@
 #include <xen/time.h>
 #include <xen/pci.h>
 #include <xen/pci_regs.h>
+#include <asm/msi.h>
 #include "iommu.h"
 #include "dmar.h"
 #include "vtd.h"
-#include "msi.h"
 #include "extern.h"
 
 static void print_qi_regs(struct iommu *iommu)
diff -r 6ecbb00e58cd -r 86c0353f19d0 xen/drivers/passthrough/vtd/utils.c
--- a/xen/drivers/passthrough/vtd/utils.c       Thu May 01 10:30:22 2008 +0100
+++ b/xen/drivers/passthrough/vtd/utils.c       Thu May 01 10:31:29 2008 +0100
@@ -23,9 +23,9 @@
 #include <xen/time.h>
 #include <xen/pci.h>
 #include <xen/pci_regs.h>
+#include <asm/msi.h>
 #include "iommu.h"
 #include "dmar.h"
-#include "msi.h"
 #include "vtd.h"
 
 #define INTEL   0x8086
diff -r 6ecbb00e58cd -r 86c0353f19d0 xen/include/asm-x86/fixmap.h
--- a/xen/include/asm-x86/fixmap.h      Thu May 01 10:30:22 2008 +0100
+++ b/xen/include/asm-x86/fixmap.h      Thu May 01 10:31:29 2008 +0100
@@ -19,6 +19,7 @@
 #include <xen/kexec.h>
 #include <xen/iommu.h>
 #include <asm/amd-iommu.h>
+#include <asm/msi.h>
 
 /*
  * Here we define all the compile-time 'special' virtual
@@ -47,6 +48,8 @@ enum fixed_addresses {
     FIX_IOMMU_MMIO_BASE_0,
     FIX_IOMMU_MMIO_END = FIX_IOMMU_MMIO_BASE_0 + IOMMU_PAGES -1,
     FIX_TBOOT_SHARED_BASE,
+    FIX_MSIX_IO_RESERV_BASE,
+    FIX_MSIX_IO_RESERV_END = FIX_MSIX_IO_RESERV_BASE + MAX_MSIX_PAGES -1,
     __end_of_fixed_addresses
 };
 
diff -r 6ecbb00e58cd -r 86c0353f19d0 
xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h      Thu May 01 10:30:22 
2008 +0100
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h      Thu May 01 10:31:29 
2008 +0100
@@ -435,33 +435,4 @@
 #define IOMMU_IO_READ_ENABLED           1
 #define HACK_BIOS_SETTINGS                  0
 
-/* MSI interrupt */
-#define MSI_DATA_VECTOR_SHIFT       0
-#define MSI_DATA_VECTOR(v)      (((u8)v) << MSI_DATA_VECTOR_SHIFT)
-
-#define MSI_DATA_DELIVERY_SHIFT     8
-#define MSI_DATA_DELIVERY_FIXED (0 << MSI_DATA_DELIVERY_SHIFT)
-#define MSI_DATA_DELIVERY_LOWPRI    (1 << MSI_DATA_DELIVERY_SHIFT)
-
-#define MSI_DATA_LEVEL_SHIFT        14
-#define MSI_DATA_LEVEL_DEASSERT (0 << MSI_DATA_LEVEL_SHIFT)
-#define MSI_DATA_LEVEL_ASSERT   (1 << MSI_DATA_LEVEL_SHIFT)
-
-#define MSI_DATA_TRIGGER_SHIFT      15
-#define MSI_DATA_TRIGGER_EDGE   (0 << MSI_DATA_TRIGGER_SHIFT)
-#define  MSI_DATA_TRIGGER_LEVEL  (1 << MSI_DATA_TRIGGER_SHIFT)
-
-#define MSI_TARGET_CPU_SHIFT        12
-#define MSI_ADDR_HEADER         0xfee00000
-#define MSI_ADDR_DESTID_MASK        0xfff0000f
-#define MSI_ADDR_DESTID_CPU(cpu)    ((cpu) << MSI_TARGET_CPU_SHIFT)
-
-#define MSI_ADDR_DESTMODE_SHIFT     2
-#define MSI_ADDR_DESTMODE_PHYS  (0 << MSI_ADDR_DESTMODE_SHIFT)
-#define MSI_ADDR_DESTMODE_LOGIC (1 << MSI_ADDR_DESTMODE_SHIFT)
-
-#define MSI_ADDR_REDIRECTION_SHIFT  3
-#define MSI_ADDR_REDIRECTION_CPU    (0 << MSI_ADDR_REDIRECTION_SHIFT)
-#define MSI_ADDR_REDIRECTION_LOWPRI (1 << MSI_ADDR_REDIRECTION_SHIFT)
-
 #endif /* _ASM_X86_64_AMD_IOMMU_DEFS_H */
diff -r 6ecbb00e58cd -r 86c0353f19d0 xen/include/asm-x86/msi.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/asm-x86/msi.h Thu May 01 10:31:29 2008 +0100
@@ -0,0 +1,210 @@
+#ifndef __ASM_MSI_H
+#define __ASM_MSI_H
+
+#include <xen/cpumask.h>
+#include <asm/irq.h>
+/*
+ * Constants for Intel APIC based MSI messages.
+ */
+
+/*
+ * Shifts for MSI data
+ */
+
+#define MSI_DATA_VECTOR_SHIFT          0
+#define  MSI_DATA_VECTOR_MASK          0x000000ff
+#define         MSI_DATA_VECTOR(v)             (((v) << MSI_DATA_VECTOR_SHIFT) 
& MSI_DATA_VECTOR_MASK)
+
+#define MSI_DATA_DELIVERY_MODE_SHIFT   8
+#define  MSI_DATA_DELIVERY_FIXED       (0 << MSI_DATA_DELIVERY_MODE_SHIFT)
+#define  MSI_DATA_DELIVERY_LOWPRI      (1 << MSI_DATA_DELIVERY_MODE_SHIFT)
+
+#define MSI_DATA_LEVEL_SHIFT           14
+#define         MSI_DATA_LEVEL_DEASSERT        (0 << MSI_DATA_LEVEL_SHIFT)
+#define         MSI_DATA_LEVEL_ASSERT          (1 << MSI_DATA_LEVEL_SHIFT)
+
+#define MSI_DATA_TRIGGER_SHIFT         15
+#define  MSI_DATA_TRIGGER_EDGE         (0 << MSI_DATA_TRIGGER_SHIFT)
+#define  MSI_DATA_TRIGGER_LEVEL                (1 << MSI_DATA_TRIGGER_SHIFT)
+
+/*
+ * Shift/mask fields for msi address
+ */
+
+#define MSI_ADDR_BASE_HI               0
+#define MSI_ADDR_BASE_LO               0xfee00000
+#define MSI_ADDR_HEADER             MSI_ADDR_BASE_LO
+
+#define MSI_ADDR_DESTMODE_SHIFT     2
+#define MSI_ADDR_DESTMODE_PHYS      (0 << MSI_ADDR_DESTMODE_SHIFT)
+#define MSI_ADDR_DESTMODE_LOGIC     (1 << MSI_ADDR_DESTMODE_SHIFT)
+
+#define MSI_ADDR_REDIRECTION_SHIFT  3
+#define MSI_ADDR_REDIRECTION_CPU    (0 << MSI_ADDR_REDIRECTION_SHIFT)
+#define MSI_ADDR_REDIRECTION_LOWPRI (1 << MSI_ADDR_REDIRECTION_SHIFT)
+
+#define MSI_ADDR_DEST_ID_SHIFT         12
+#define         MSI_ADDR_DEST_ID_MASK          0x00ffff0
+#define  MSI_ADDR_DEST_ID(dest)                (((dest) << 
MSI_ADDR_DEST_ID_SHIFT) & MSI_ADDR_DEST_ID_MASK)
+
+/* MAX fixed pages reserved for mapping MSIX tables. */
+#if defined(__x86_64__)
+#define MAX_MSIX_PAGES              512
+#else
+#define MAX_MSIX_PAGES              32
+#endif
+
+struct msi_msg {
+       u32     address_lo;     /* low 32 bits of msi message address */
+       u32     address_hi;     /* high 32 bits of msi message address */
+       u32     data;           /* 16 bits of msi message data */
+};
+
+/* Helper functions */
+extern void mask_msi_irq(unsigned int irq);
+extern void unmask_msi_irq(unsigned int irq);
+extern void read_msi_msg(unsigned int irq, struct msi_msg *msg);
+extern void write_msi_msg(unsigned int irq, struct msi_msg *msg);
+extern void set_msi_irq_affinity(unsigned int irq, cpumask_t mask);
+extern int pci_enable_msi(u8 bus, u8 devfn, int vector, int entry_nr, int msi);
+extern void pci_disable_msi(int vector);
+
+struct msi_desc {
+       struct {
+               __u8    type    : 5;    /* {0: unused, 5h:MSI, 11h:MSI-X} */
+               __u8    maskbit : 1;    /* mask-pending bit supported ?   */
+               __u8    masked  : 1;
+               __u8    is_64   : 1;    /* Address size: 0=32bit 1=64bit  */
+               __u8    pos;            /* Location of the msi capability */
+               __u16   entry_nr;       /* specific enabled entry         */
+       }msi_attrib;
+
+       struct list_head list;
+
+       void __iomem *mask_base;
+       struct pci_dev *dev;
+    int vector;
+
+       /* Last set MSI message */
+       struct msi_msg msg;
+};
+
+/*
+ * Assume the maximum number of hot plug slots supported by the system is about
+ * ten. The worstcase is that each of these slots is hot-added with a device,
+ * which has two MSI/MSI-X capable functions. To avoid any MSI-X driver, which
+ * attempts to request all available vectors, NR_HP_RESERVED_VECTORS is defined
+ * as below to ensure at least one message is assigned to each detected MSI/
+ * MSI-X device function.
+ */
+#define NR_HP_RESERVED_VECTORS         20
+
+extern int vector_irq[NR_VECTORS];
+
+/*
+ * MSI-X Address Register
+ */
+#define PCI_MSIX_FLAGS_QSIZE           0x7FF
+#define PCI_MSIX_FLAGS_ENABLE          (1 << 15)
+#define PCI_MSIX_FLAGS_BIRMASK         (7 << 0)
+#define PCI_MSIX_FLAGS_BITMASK         (1 << 0)
+
+#define PCI_MSIX_ENTRY_SIZE                    16
+#define  PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET      0
+#define  PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET      4
+#define  PCI_MSIX_ENTRY_DATA_OFFSET            8
+#define  PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET     12
+
+#define msi_control_reg(base)          (base + PCI_MSI_FLAGS)
+#define msi_lower_address_reg(base)    (base + PCI_MSI_ADDRESS_LO)
+#define msi_upper_address_reg(base)    (base + PCI_MSI_ADDRESS_HI)
+#define msi_data_reg(base, is64bit)    \
+       ( (is64bit == 1) ? base+PCI_MSI_DATA_64 : base+PCI_MSI_DATA_32 )
+#define msi_mask_bits_reg(base, is64bit) \
+       ( (is64bit == 1) ? base+PCI_MSI_MASK_BIT : base+PCI_MSI_MASK_BIT-4)
+#define msi_disable(control)           control &= ~PCI_MSI_FLAGS_ENABLE
+#define multi_msi_capable(control) \
+       (1 << ((control & PCI_MSI_FLAGS_QMASK) >> 1))
+#define multi_msi_enable(control, num) \
+       control |= (((num >> 1) << 4) & PCI_MSI_FLAGS_QSIZE);
+#define is_64bit_address(control)      (!!(control & PCI_MSI_FLAGS_64BIT))
+#define is_mask_bit_support(control)   (!!(control & PCI_MSI_FLAGS_MASKBIT))
+#define msi_enable(control, num) multi_msi_enable(control, num); \
+       control |= PCI_MSI_FLAGS_ENABLE
+
+#define msix_control_reg(base)         (base + PCI_MSIX_FLAGS)
+#define msix_table_offset_reg(base)    (base + 0x04)
+#define msix_pba_offset_reg(base)      (base + 0x08)
+#define msix_enable(control)           control |= PCI_MSIX_FLAGS_ENABLE
+#define msix_disable(control)          control &= ~PCI_MSIX_FLAGS_ENABLE
+#define msix_table_size(control)       ((control & PCI_MSIX_FLAGS_QSIZE)+1)
+#define multi_msix_capable             msix_table_size
+#define msix_unmask(address)           (address & ~PCI_MSIX_FLAGS_BITMASK)
+#define msix_mask(address)             (address | PCI_MSIX_FLAGS_BITMASK)
+#define msix_is_pending(address)       (address & PCI_MSIX_FLAGS_PENDMASK)
+
+/*
+ * MSI Defined Data Structures
+ */
+#define MSI_ADDRESS_HEADER             0xfee
+#define MSI_ADDRESS_HEADER_SHIFT       12
+#define MSI_ADDRESS_HEADER_MASK                0xfff000
+#define MSI_ADDRESS_DEST_ID_MASK       0xfff0000f
+#define MSI_TARGET_CPU_MASK            0xff
+#define MSI_TARGET_CPU_SHIFT           12
+#define MSI_DELIVERY_MODE              0
+#define MSI_LEVEL_MODE                 1       /* Edge always assert */
+#define MSI_TRIGGER_MODE               0       /* MSI is edge sensitive */
+#define MSI_PHYSICAL_MODE              0
+#define MSI_LOGICAL_MODE               1
+#define MSI_REDIRECTION_HINT_MODE      0
+
+#define __LITTLE_ENDIAN_BITFIELD       1
+
+struct msg_data {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+       __u32   vector          :  8;
+       __u32   delivery_mode   :  3;   /* 000b: FIXED | 001b: lowest prior */
+       __u32   reserved_1      :  3;
+       __u32   level           :  1;   /* 0: deassert | 1: assert */
+       __u32   trigger         :  1;   /* 0: edge | 1: level */
+       __u32   reserved_2      : 16;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+       __u32   reserved_2      : 16;
+       __u32   trigger         :  1;   /* 0: edge | 1: level */
+       __u32   level           :  1;   /* 0: deassert | 1: assert */
+       __u32   reserved_1      :  3;
+       __u32   delivery_mode   :  3;   /* 000b: FIXED | 001b: lowest prior */
+       __u32   vector          :  8;
+#else
+#error "Bitfield endianness not defined! Check your byteorder.h"
+#endif
+} __attribute__ ((packed));
+
+struct msg_address {
+       union {
+               struct {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+                       __u32   reserved_1      :  2;
+                       __u32   dest_mode       :  1;   /*0:physic | 1:logic */
+                       __u32   redirection_hint:  1;   /*0: dedicated CPU
+                                                         1: lowest priority */
+                       __u32   reserved_2      :  4;
+                       __u32   dest_id         : 24;   /* Destination ID */
+#elif defined(__BIG_ENDIAN_BITFIELD)
+                       __u32   dest_id         : 24;   /* Destination ID */
+                       __u32   reserved_2      :  4;
+                       __u32   redirection_hint:  1;   /*0: dedicated CPU
+                                                         1: lowest priority */
+                       __u32   dest_mode       :  1;   /*0:physic | 1:logic */
+                       __u32   reserved_1      :  2;
+#else
+#error "Bitfield endianness not defined! Check your byteorder.h"
+#endif
+               }u;
+                       __u32  value;
+       }lo_address;
+       __u32   hi_address;
+} __attribute__ ((packed));
+
+#endif /* __ASM_MSI_H */
diff -r 6ecbb00e58cd -r 86c0353f19d0 xen/include/public/physdev.h
--- a/xen/include/public/physdev.h      Thu May 01 10:30:22 2008 +0100
+++ b/xen/include/public/physdev.h      Thu May 01 10:31:29 2008 +0100
@@ -135,6 +135,11 @@ struct physdev_map_pirq {
     int index;
     /* IN or OUT */
     int pirq;
+    /* IN */
+    struct {
+        int bus, devfn, entry_nr;
+               int msi;  /* 0 - MSIX    1 - MSI */
+    } msi_info;
 };
 typedef struct physdev_map_pirq physdev_map_pirq_t;
 DEFINE_XEN_GUEST_HANDLE(physdev_map_pirq_t);
diff -r 6ecbb00e58cd -r 86c0353f19d0 xen/include/xen/iommu.h
--- a/xen/include/xen/iommu.h   Thu May 01 10:30:22 2008 +0100
+++ b/xen/include/xen/iommu.h   Thu May 01 10:31:29 2008 +0100
@@ -23,6 +23,7 @@
 #include <xen/init.h>
 #include <xen/pci.h>
 #include <xen/spinlock.h>
+#include <xen/pci.h>
 #include <public/hvm/ioreq.h>
 #include <public/domctl.h>
 
diff -r 6ecbb00e58cd -r 86c0353f19d0 xen/include/xen/irq.h
--- a/xen/include/xen/irq.h     Thu May 01 10:30:22 2008 +0100
+++ b/xen/include/xen/irq.h     Thu May 01 10:31:29 2008 +0100
@@ -44,6 +44,7 @@ typedef struct hw_interrupt_type hw_irq_
 
 #include <asm/irq.h>
 
+struct msi_desc;
 /*
  * This is the "IRQ descriptor", which contains various information
  * about the irq, including what kind of hardware handling it has,
@@ -54,6 +55,7 @@ typedef struct {
 typedef struct {
     unsigned int status;               /* IRQ status */
     hw_irq_controller *handler;
+    struct msi_desc   *msi_desc;
     struct irqaction *action;  /* IRQ action list */
     unsigned int depth;                /* nested irq disables */
     spinlock_t lock;
diff -r 6ecbb00e58cd -r 86c0353f19d0 xen/include/xen/pci.h
--- a/xen/include/xen/pci.h     Thu May 01 10:30:22 2008 +0100
+++ b/xen/include/xen/pci.h     Thu May 01 10:31:29 2008 +0100
@@ -26,8 +26,10 @@
 
 struct pci_dev {
     struct list_head list;
+    struct list_head msi_dev_list;
     u8 bus;
     u8 devfn;
+    struct list_head msi_list;
 };
 
 uint8_t pci_conf_read8(

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.