[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen master] vpci/msix: add MSI-X handlers



commit d6281be9d01456a5989daa4eb2eccd718d73857d
Author:     Roger Pau Monne <roger.pau@xxxxxxxxxx>
AuthorDate: Thu Mar 22 15:00:00 2018 +0100
Commit:     Jan Beulich <jbeulich@xxxxxxxx>
CommitDate: Fri Mar 23 10:26:06 2018 +0100

    vpci/msix: add MSI-X handlers
    
    Add handlers for accesses to the MSI-X message control field on the
    PCI configuration space, and traps for accesses to the memory region
    that contains the MSI-X table and PBA. This traps detect attempts from
    the guest to configure MSI-X interrupts and properly sets them up.
    
    Note that accesses to the Table Offset, Table BIR, PBA Offset and PBA
    BIR are not trapped by Xen at the moment.
    
    Finally, turn the panic in the Dom0 PVH builder into a warning.
    
    Signed-off-by: Roger Pau Monné <roger.pau@xxxxxxxxxx>
    Reviewed-by: Jan Beulich <jbeulich@xxxxxxxx>
    [IO]
    Reviewed-by: Paul Durrant <paul.durrant@xxxxxxxxxx>
---
 xen/arch/x86/hvm/dom0_build.c    |   2 +-
 xen/arch/x86/hvm/hvm.c           |   1 +
 xen/arch/x86/hvm/vmsi.c          | 160 +++++++++++---
 xen/drivers/vpci/Makefile        |   2 +-
 xen/drivers/vpci/header.c        |  19 ++
 xen/drivers/vpci/msi.c           |  27 ++-
 xen/drivers/vpci/msix.c          | 458 +++++++++++++++++++++++++++++++++++++++
 xen/drivers/vpci/vpci.c          |   1 +
 xen/include/asm-x86/hvm/domain.h |   3 +
 xen/include/asm-x86/hvm/io.h     |   5 +
 xen/include/xen/vpci.h           |  73 +++++++
 11 files changed, 720 insertions(+), 31 deletions(-)

diff --git a/xen/arch/x86/hvm/dom0_build.c b/xen/arch/x86/hvm/dom0_build.c
index 259814d95d..d3f65eadbe 100644
--- a/xen/arch/x86/hvm/dom0_build.c
+++ b/xen/arch/x86/hvm/dom0_build.c
@@ -1117,7 +1117,7 @@ int __init dom0_construct_pvh(struct domain *d, const 
module_t *image,
 
     pvh_setup_mmcfg(d);
 
-    panic("Building a PVHv2 Dom0 is not yet supported.");
+    printk("WARNING: PVH is an experimental mode with limited 
functionality\n");
     return 0;
 }
 
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 346e11f2d6..569b124603 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -588,6 +588,7 @@ int hvm_domain_initialise(struct domain *d)
     INIT_LIST_HEAD(&d->arch.hvm_domain.write_map.list);
     INIT_LIST_HEAD(&d->arch.hvm_domain.g2m_ioport_list);
     INIT_LIST_HEAD(&d->arch.hvm_domain.mmcfg_regions);
+    INIT_LIST_HEAD(&d->arch.hvm_domain.msix_tables);
 
     rc = create_perdomain_mapping(d, PERDOMAIN_VIRT_START, 0, NULL, NULL);
     if ( rc )
diff --git a/xen/arch/x86/hvm/vmsi.c b/xen/arch/x86/hvm/vmsi.c
index be59c56d43..c31d27c389 100644
--- a/xen/arch/x86/hvm/vmsi.c
+++ b/xen/arch/x86/hvm/vmsi.c
@@ -30,6 +30,7 @@
 #include <xen/lib.h>
 #include <xen/errno.h>
 #include <xen/sched.h>
+#include <xen/softirq.h>
 #include <xen/irq.h>
 #include <xen/vpci.h>
 #include <public/hvm/ioreq.h>
@@ -644,13 +645,10 @@ static unsigned int msi_gflags(uint16_t data, uint64_t 
addr, bool masked)
            (masked ? 0 : XEN_DOMCTL_VMSI_X86_UNMASKED);
 }
 
-void vpci_msi_arch_mask(struct vpci_msi *msi, const struct pci_dev *pdev,
-                        unsigned int entry, bool mask)
+static void vpci_mask_pirq(struct domain *d, int pirq, bool mask)
 {
     unsigned long flags;
-    struct irq_desc *desc = domain_spin_lock_irq_desc(pdev->domain,
-                                                      msi->arch.pirq + entry,
-                                                      &flags);
+    struct irq_desc *desc = domain_spin_lock_irq_desc(d, pirq, &flags);
 
     if ( !desc )
         return;
@@ -658,23 +656,31 @@ void vpci_msi_arch_mask(struct vpci_msi *msi, const 
struct pci_dev *pdev,
     spin_unlock_irqrestore(&desc->lock, flags);
 }
 
-int vpci_msi_arch_enable(struct vpci_msi *msi, const struct pci_dev *pdev,
-                         unsigned int vectors)
+void vpci_msi_arch_mask(struct vpci_msi *msi, const struct pci_dev *pdev,
+                        unsigned int entry, bool mask)
+{
+    vpci_mask_pirq(pdev->domain, msi->arch.pirq + entry, mask);
+}
+
+static int vpci_msi_enable(const struct pci_dev *pdev, uint32_t data,
+                           uint64_t address, unsigned int nr,
+                           paddr_t table_base, uint32_t mask)
 {
     struct msi_info msi_info = {
         .seg = pdev->seg,
         .bus = pdev->bus,
         .devfn = pdev->devfn,
-        .entry_nr = vectors,
+        .table_base = table_base,
+        .entry_nr = nr,
     };
-    unsigned int i;
-    int rc;
-
-    ASSERT(msi->arch.pirq == INVALID_PIRQ);
+    unsigned int i, vectors = table_base ? 1 : nr;
+    int rc, pirq = INVALID_PIRQ;
 
     /* Get a PIRQ. */
-    rc = allocate_and_map_msi_pirq(pdev->domain, -1, &msi->arch.pirq,
-                                   MAP_PIRQ_TYPE_MULTI_MSI, &msi_info);
+    rc = allocate_and_map_msi_pirq(pdev->domain, -1, &pirq,
+                                   table_base ? MAP_PIRQ_TYPE_MSI
+                                              : MAP_PIRQ_TYPE_MULTI_MSI,
+                                   &msi_info);
     if ( rc )
     {
         gdprintk(XENLOG_ERR, "%04x:%02x:%02x.%u: failed to map PIRQ: %d\n",
@@ -685,15 +691,14 @@ int vpci_msi_arch_enable(struct vpci_msi *msi, const 
struct pci_dev *pdev,
 
     for ( i = 0; i < vectors; i++ )
     {
-        uint8_t vector = MASK_EXTR(msi->data, MSI_DATA_VECTOR_MASK);
-        uint8_t vector_mask = 0xff >> (8 - fls(msi->vectors) + 1);
+        uint8_t vector = MASK_EXTR(data, MSI_DATA_VECTOR_MASK);
+        uint8_t vector_mask = 0xff >> (8 - fls(vectors) + 1);
         struct xen_domctl_bind_pt_irq bind = {
-            .machine_irq = msi->arch.pirq + i,
+            .machine_irq = pirq + i,
             .irq_type = PT_IRQ_TYPE_MSI,
             .u.msi.gvec = (vector & ~vector_mask) |
                           ((vector + i) & vector_mask),
-            .u.msi.gflags = msi_gflags(msi->data, msi->address,
-                                       (msi->mask >> i) & 1),
+            .u.msi.gflags = msi_gflags(data, address, (mask >> i) & 1),
         };
 
         pcidevs_lock();
@@ -703,33 +708,49 @@ int vpci_msi_arch_enable(struct vpci_msi *msi, const 
struct pci_dev *pdev,
             gdprintk(XENLOG_ERR,
                      "%04x:%02x:%02x.%u: failed to bind PIRQ %u: %d\n",
                      pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
-                     PCI_FUNC(pdev->devfn), msi->arch.pirq + i, rc);
+                     PCI_FUNC(pdev->devfn), pirq + i, rc);
             while ( bind.machine_irq-- )
                 pt_irq_destroy_bind(pdev->domain, &bind);
             spin_lock(&pdev->domain->event_lock);
-            unmap_domain_pirq(pdev->domain, msi->arch.pirq);
+            unmap_domain_pirq(pdev->domain, pirq);
             spin_unlock(&pdev->domain->event_lock);
             pcidevs_unlock();
-            msi->arch.pirq = INVALID_PIRQ;
             return rc;
         }
         pcidevs_unlock();
     }
 
-    return 0;
+    return pirq;
 }
 
-void vpci_msi_arch_disable(struct vpci_msi *msi, const struct pci_dev *pdev)
+int vpci_msi_arch_enable(struct vpci_msi *msi, const struct pci_dev *pdev,
+                         unsigned int vectors)
+{
+    int rc;
+
+    ASSERT(msi->arch.pirq == INVALID_PIRQ);
+    rc = vpci_msi_enable(pdev, msi->data, msi->address, vectors, 0, msi->mask);
+    if ( rc >= 0 )
+    {
+        msi->arch.pirq = rc;
+        rc = 0;
+    }
+
+    return rc;
+}
+
+static void vpci_msi_disable(const struct pci_dev *pdev, int pirq,
+                             unsigned int nr)
 {
     unsigned int i;
 
-    ASSERT(msi->arch.pirq != INVALID_PIRQ);
+    ASSERT(pirq != INVALID_PIRQ);
 
     pcidevs_lock();
-    for ( i = 0; i < msi->vectors; i++ )
+    for ( i = 0; i < nr; i++ )
     {
         struct xen_domctl_bind_pt_irq bind = {
-            .machine_irq = msi->arch.pirq + i,
+            .machine_irq = pirq + i,
             .irq_type = PT_IRQ_TYPE_MSI,
         };
         int rc;
@@ -739,10 +760,14 @@ void vpci_msi_arch_disable(struct vpci_msi *msi, const 
struct pci_dev *pdev)
     }
 
     spin_lock(&pdev->domain->event_lock);
-    unmap_domain_pirq(pdev->domain, msi->arch.pirq);
+    unmap_domain_pirq(pdev->domain, pirq);
     spin_unlock(&pdev->domain->event_lock);
     pcidevs_unlock();
+}
 
+void vpci_msi_arch_disable(struct vpci_msi *msi, const struct pci_dev *pdev)
+{
+    vpci_msi_disable(pdev, msi->arch.pirq, msi->vectors);
     msi->arch.pirq = INVALID_PIRQ;
 }
 
@@ -763,3 +788,82 @@ void vpci_msi_arch_print(const struct vpci_msi *msi)
            MASK_EXTR(msi->address, MSI_ADDR_DEST_ID_MASK),
            msi->arch.pirq);
 }
+
+void vpci_msix_arch_mask_entry(struct vpci_msix_entry *entry,
+                               const struct pci_dev *pdev, bool mask)
+{
+    ASSERT(entry->arch.pirq != INVALID_PIRQ);
+    vpci_mask_pirq(pdev->domain, entry->arch.pirq, mask);
+}
+
+int vpci_msix_arch_enable_entry(struct vpci_msix_entry *entry,
+                                const struct pci_dev *pdev, paddr_t table_base)
+{
+    int rc;
+
+    ASSERT(entry->arch.pirq == INVALID_PIRQ);
+    rc = vpci_msi_enable(pdev, entry->data, entry->addr,
+                         vmsix_entry_nr(pdev->vpci->msix, entry),
+                         table_base, entry->masked);
+    if ( rc >= 0 )
+    {
+        entry->arch.pirq = rc;
+        rc = 0;
+    }
+
+    return rc;
+}
+
+int vpci_msix_arch_disable_entry(struct vpci_msix_entry *entry,
+                                 const struct pci_dev *pdev)
+{
+    if ( entry->arch.pirq == INVALID_PIRQ )
+        return -ENOENT;
+
+    vpci_msi_disable(pdev, entry->arch.pirq, 1);
+    entry->arch.pirq = INVALID_PIRQ;
+
+    return 0;
+}
+
+void vpci_msix_arch_init_entry(struct vpci_msix_entry *entry)
+{
+    entry->arch.pirq = INVALID_PIRQ;
+}
+
+int vpci_msix_arch_print(const struct vpci_msix *msix)
+{
+    unsigned int i;
+
+    for ( i = 0; i < msix->max_entries; i++ )
+    {
+        const struct vpci_msix_entry *entry = &msix->entries[i];
+
+        printk("%6u vec=%02x%7s%6s%3sassert%5s%7s dest_id=%lu mask=%u pirq: 
%d\n",
+               i, MASK_EXTR(entry->data, MSI_DATA_VECTOR_MASK),
+               entry->data & MSI_DATA_DELIVERY_LOWPRI ? "lowest" : "fixed",
+               entry->data & MSI_DATA_TRIGGER_LEVEL ? "level" : "edge",
+               entry->data & MSI_DATA_LEVEL_ASSERT ? "" : "de",
+               entry->addr & MSI_ADDR_DESTMODE_LOGIC ? "log" : "phys",
+               entry->addr & MSI_ADDR_REDIRECTION_LOWPRI ? "lowest" : "fixed",
+               MASK_EXTR(entry->addr, MSI_ADDR_DEST_ID_MASK),
+               entry->masked, entry->arch.pirq);
+        if ( i && !(i % 64) )
+        {
+            struct pci_dev *pdev = msix->pdev;
+
+            spin_unlock(&msix->pdev->vpci->lock);
+            process_pending_softirqs();
+            /* NB: we assume that pdev cannot go away for an alive domain. */
+            if ( !pdev->vpci || !spin_trylock(&pdev->vpci->lock) )
+                return -EBUSY;
+            if ( pdev->vpci->msix != msix )
+            {
+                spin_unlock(&pdev->vpci->lock);
+                return -EAGAIN;
+            }
+        }
+    }
+
+    return 0;
+}
diff --git a/xen/drivers/vpci/Makefile b/xen/drivers/vpci/Makefile
index 62cec9e82b..55d1bdfda0 100644
--- a/xen/drivers/vpci/Makefile
+++ b/xen/drivers/vpci/Makefile
@@ -1 +1 @@
-obj-y += vpci.o header.o msi.o
+obj-y += vpci.o header.o msi.o msix.o
diff --git a/xen/drivers/vpci/header.c b/xen/drivers/vpci/header.c
index 9fa07992cc..91a71ca66e 100644
--- a/xen/drivers/vpci/header.c
+++ b/xen/drivers/vpci/header.c
@@ -190,6 +190,7 @@ static int modify_bars(const struct pci_dev *pdev, bool 
map, bool rom_only)
     struct vpci_header *header = &pdev->vpci->header;
     struct rangeset *mem = rangeset_new(NULL, NULL, 0);
     struct pci_dev *tmp, *dev = NULL;
+    const struct vpci_msix *msix = pdev->vpci->msix;
     unsigned int i;
     int rc;
 
@@ -226,6 +227,24 @@ static int modify_bars(const struct pci_dev *pdev, bool 
map, bool rom_only)
         }
     }
 
+    /* Remove any MSIX regions if present. */
+    for ( i = 0; msix && i < ARRAY_SIZE(msix->tables); i++ )
+    {
+        unsigned long start = PFN_DOWN(vmsix_table_addr(pdev->vpci, i));
+        unsigned long end = PFN_DOWN(vmsix_table_addr(pdev->vpci, i) +
+                                     vmsix_table_size(pdev->vpci, i) - 1);
+
+        rc = rangeset_remove_range(mem, start, end);
+        if ( rc )
+        {
+            printk(XENLOG_G_WARNING
+                   "Failed to remove MSIX table [%lx, %lx]: %d\n",
+                   start, end, rc);
+            rangeset_destroy(mem);
+            return rc;
+        }
+    }
+
     /*
      * Check for overlaps with other BARs. Note that only BARs that are
      * currently mapped (enabled) are checked for overlaps.
diff --git a/xen/drivers/vpci/msi.c b/xen/drivers/vpci/msi.c
index de4ddf562e..ad26c38a92 100644
--- a/xen/drivers/vpci/msi.c
+++ b/xen/drivers/vpci/msi.c
@@ -281,11 +281,12 @@ void vpci_dump_msi(void)
         if ( !has_vpci(d) )
             continue;
 
-        printk("vPCI MSI d%d\n", d->domain_id);
+        printk("vPCI MSI/MSI-X d%d\n", d->domain_id);
 
         list_for_each_entry ( pdev, &d->arch.pdev_list, domain_list )
         {
             const struct vpci_msi *msi;
+            const struct vpci_msix *msix;
 
             if ( !pdev->vpci || !spin_trylock(&pdev->vpci->lock) )
                 continue;
@@ -306,6 +307,30 @@ void vpci_dump_msi(void)
                 vpci_msi_arch_print(msi);
             }
 
+            msix = pdev->vpci->msix;
+            if ( msix && msix->enabled )
+            {
+                int rc;
+
+                printk("%04x:%02x:%02x.%u MSI-X\n", pdev->seg, pdev->bus,
+                       PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+
+                printk("  entries: %u maskall: %d enabled: %d\n",
+                       msix->max_entries, msix->masked, msix->enabled);
+
+                rc = vpci_msix_arch_print(msix);
+                if ( rc )
+                {
+                    /*
+                     * On error vpci_msix_arch_print will always return without
+                     * holding the lock.
+                     */
+                    printk("unable to print all MSI-X entries: %d\n", rc);
+                    process_pending_softirqs();
+                    continue;
+                }
+            }
+
             spin_unlock(&pdev->vpci->lock);
             process_pending_softirqs();
         }
diff --git a/xen/drivers/vpci/msix.c b/xen/drivers/vpci/msix.c
new file mode 100644
index 0000000000..3b378c2e51
--- /dev/null
+++ b/xen/drivers/vpci/msix.c
@@ -0,0 +1,458 @@
+/*
+ * Handlers for accesses to the MSI-X capability structure and the memory
+ * region.
+ *
+ * Copyright (C) 2017 Citrix Systems R&D
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms and conditions of the GNU General Public
+ * License, version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <xen/sched.h>
+#include <xen/vpci.h>
+
+#include <asm/msi.h>
+
+#define VMSIX_SIZE(num) offsetof(struct vpci_msix, entries[num])
+
+#define VMSIX_ADDR_IN_RANGE(addr, vpci, nr)                               \
+    ((addr) >= vmsix_table_addr(vpci, nr) &&                              \
+     (addr) < vmsix_table_addr(vpci, nr) + vmsix_table_size(vpci, nr))
+
+static uint32_t control_read(const struct pci_dev *pdev, unsigned int reg,
+                             void *data)
+{
+    const struct vpci_msix *msix = data;
+
+    return (msix->max_entries - 1) |
+           (msix->enabled ? PCI_MSIX_FLAGS_ENABLE : 0) |
+           (msix->masked ? PCI_MSIX_FLAGS_MASKALL : 0);
+}
+
+static int update_entry(struct vpci_msix_entry *entry,
+                        const struct pci_dev *pdev, unsigned int nr)
+{
+    uint8_t slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
+    int rc = vpci_msix_arch_disable_entry(entry, pdev);
+
+    /* Ignore ENOENT, it means the entry wasn't setup. */
+    if ( rc && rc != -ENOENT )
+    {
+        gprintk(XENLOG_WARNING,
+                "%04x:%02x:%02x.%u: unable to disable entry %u for update: 
%d\n",
+                pdev->seg, pdev->bus, slot, func, nr, rc);
+        return rc;
+    }
+
+    rc = vpci_msix_arch_enable_entry(entry, pdev,
+                                     vmsix_table_base(pdev->vpci,
+                                                      VPCI_MSIX_TABLE));
+    if ( rc )
+    {
+        gprintk(XENLOG_WARNING,
+                "%04x:%02x:%02x.%u: unable to enable entry %u: %d\n",
+                pdev->seg, pdev->bus, slot, func, nr, rc);
+        /* Entry is likely not properly configured. */
+        return rc;
+    }
+
+    return 0;
+}
+
+static void control_write(const struct pci_dev *pdev, unsigned int reg,
+                          uint32_t val, void *data)
+{
+    uint8_t slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
+    struct vpci_msix *msix = data;
+    bool new_masked = val & PCI_MSIX_FLAGS_MASKALL;
+    bool new_enabled = val & PCI_MSIX_FLAGS_ENABLE;
+    unsigned int i;
+    int rc;
+
+    if ( new_masked == msix->masked && new_enabled == msix->enabled )
+        return;
+
+    /*
+     * According to the PCI 3.0 specification, switching the enable bit to 1
+     * or the function mask bit to 0 should cause all the cached addresses
+     * and data fields to be recalculated.
+     *
+     * In order to avoid the overhead of disabling and enabling all the
+     * entries every time the guest sets the maskall bit, Xen will only
+     * perform the disable and enable sequence when the guest has written to
+     * the entry.
+     */
+    if ( new_enabled && !new_masked && (!msix->enabled || msix->masked) )
+    {
+        for ( i = 0; i < msix->max_entries; i++ )
+        {
+            if ( msix->entries[i].masked || !msix->entries[i].updated ||
+                 update_entry(&msix->entries[i], pdev, i) )
+                continue;
+
+            msix->entries[i].updated = false;
+        }
+    }
+    else if ( !new_enabled && msix->enabled )
+    {
+        /* Guest has disabled MSIX, disable all entries. */
+        for ( i = 0; i < msix->max_entries; i++ )
+        {
+            /*
+             * NB: vpci_msix_arch_disable can be called for entries that are
+             * not setup, it will return -ENOENT in that case.
+             */
+            rc = vpci_msix_arch_disable_entry(&msix->entries[i], pdev);
+            switch ( rc )
+            {
+            case 0:
+                /*
+                 * Mark the entry successfully disabled as updated, so that on
+                 * the next enable the entry is properly setup. This is done
+                 * so that the following flow works correctly:
+                 *
+                 * mask entry -> disable MSIX -> enable MSIX -> unmask entry
+                 *
+                 * Without setting 'updated', the 'unmask entry' step will fail
+                 * because the entry has not been updated, so it would not be
+                 * mapped/bound at all.
+                 */
+                msix->entries[i].updated = true;
+                break;
+            case -ENOENT:
+                /* Ignore non-present entry. */
+                break;
+            default:
+                gprintk(XENLOG_WARNING,
+                        "%04x:%02x:%02x.%u: unable to disable entry %u: %d\n",
+                        pdev->seg, pdev->bus, slot, func, i, rc);
+                return;
+            }
+        }
+    }
+
+    msix->masked = new_masked;
+    msix->enabled = new_enabled;
+
+    val = control_read(pdev, reg, data);
+    if ( pci_msi_conf_write_intercept(msix->pdev, reg, 2, &val) >= 0 )
+        pci_conf_write16(pdev->seg, pdev->bus, slot, func, reg, val);
+}
+
+static struct vpci_msix *msix_find(const struct domain *d, unsigned long addr)
+{
+    struct vpci_msix *msix;
+
+    list_for_each_entry ( msix, &d->arch.hvm_domain.msix_tables, next )
+    {
+        const struct vpci_bar *bars = msix->pdev->vpci->header.bars;
+        unsigned int i;
+
+        for ( i = 0; i < ARRAY_SIZE(msix->tables); i++ )
+            if ( bars[msix->tables[i] & PCI_MSIX_BIRMASK].enabled &&
+                 VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, i) )
+                return msix;
+    }
+
+    return NULL;
+}
+
+static int msix_accept(struct vcpu *v, unsigned long addr)
+{
+    return !!msix_find(v->domain, addr);
+}
+
+static bool access_allowed(const struct pci_dev *pdev, unsigned long addr,
+                           unsigned int len)
+{
+    /* Only allow aligned 32/64b accesses. */
+    if ( (len == 4 || len == 8) && !(addr & (len - 1)) )
+        return true;
+
+    gprintk(XENLOG_WARNING,
+            "%04x:%02x:%02x.%u: unaligned or invalid size MSI-X table 
access\n",
+            pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn), 
PCI_FUNC(pdev->devfn));
+
+    return false;
+}
+
+static struct vpci_msix_entry *get_entry(struct vpci_msix *msix,
+                                         paddr_t addr)
+{
+    paddr_t start = vmsix_table_addr(msix->pdev->vpci, VPCI_MSIX_TABLE);
+
+    return &msix->entries[(addr - start) / PCI_MSIX_ENTRY_SIZE];
+}
+
+static int msix_read(struct vcpu *v, unsigned long addr, unsigned int len,
+                     unsigned long *data)
+{
+    const struct domain *d = v->domain;
+    struct vpci_msix *msix = msix_find(d, addr);
+    const struct vpci_msix_entry *entry;
+    unsigned int offset;
+
+    *data = ~0ul;
+
+    if ( !msix )
+        return X86EMUL_RETRY;
+
+    if ( !access_allowed(msix->pdev, addr, len) )
+        return X86EMUL_OKAY;
+
+    if ( VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, VPCI_MSIX_PBA) )
+    {
+        /*
+         * Access to PBA.
+         *
+         * TODO: note that this relies on having the PBA identity mapped to the
+         * guest address space. If this changes the address will need to be
+         * translated.
+         */
+        switch ( len )
+        {
+        case 4:
+            *data = readl(addr);
+            break;
+
+        case 8:
+            *data = readq(addr);
+            break;
+
+        default:
+            ASSERT_UNREACHABLE();
+            break;
+        }
+
+        return X86EMUL_OKAY;
+    }
+
+    spin_lock(&msix->pdev->vpci->lock);
+    entry = get_entry(msix, addr);
+    offset = addr & (PCI_MSIX_ENTRY_SIZE - 1);
+
+    switch ( offset )
+    {
+    case PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET:
+        *data = entry->addr;
+        break;
+
+    case PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET:
+        *data = entry->addr >> 32;
+        break;
+
+    case PCI_MSIX_ENTRY_DATA_OFFSET:
+        *data = entry->data;
+        if ( len == 8 )
+            *data |=
+                (uint64_t)(entry->masked ? PCI_MSIX_VECTOR_BITMASK : 0) << 32;
+        break;
+
+    case PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET:
+        *data = entry->masked ? PCI_MSIX_VECTOR_BITMASK : 0;
+        break;
+
+    default:
+        ASSERT_UNREACHABLE();
+        break;
+    }
+    spin_unlock(&msix->pdev->vpci->lock);
+
+    return X86EMUL_OKAY;
+}
+
+static int msix_write(struct vcpu *v, unsigned long addr, unsigned int len,
+                      unsigned long data)
+{
+    const struct domain *d = v->domain;
+    struct vpci_msix *msix = msix_find(d, addr);
+    struct vpci_msix_entry *entry;
+    unsigned int offset;
+
+    if ( !msix )
+        return X86EMUL_RETRY;
+
+    if ( !access_allowed(msix->pdev, addr, len) )
+        return X86EMUL_OKAY;
+
+    if ( VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, VPCI_MSIX_PBA) )
+    {
+        /* Ignore writes to PBA for DomUs, it's behavior is undefined. */
+        if ( is_hardware_domain(d) )
+        {
+            switch ( len )
+            {
+            case 4:
+                writel(data, addr);
+                break;
+
+            case 8:
+                writeq(data, addr);
+                break;
+
+            default:
+                ASSERT_UNREACHABLE();
+                break;
+            }
+        }
+
+        return X86EMUL_OKAY;
+    }
+
+    spin_lock(&msix->pdev->vpci->lock);
+    entry = get_entry(msix, addr);
+    offset = addr & (PCI_MSIX_ENTRY_SIZE - 1);
+
+    /*
+     * NB: Xen allows writes to the data/address registers with the entry
+     * unmasked. The specification says this is undefined behavior, and Xen
+     * implements it as storing the written value, which will be made effective
+     * in the next mask/unmask cycle. This also mimics the implementation in
+     * QEMU.
+     */
+    switch ( offset )
+    {
+    case PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET:
+        entry->updated = true;
+        if ( len == 8 )
+        {
+            entry->addr = data;
+            break;
+        }
+        entry->addr &= ~0xffffffff;
+        entry->addr |= data;
+        break;
+
+    case PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET:
+        entry->updated = true;
+        entry->addr &= 0xffffffff;
+        entry->addr |= (uint64_t)data << 32;
+        break;
+
+    case PCI_MSIX_ENTRY_DATA_OFFSET:
+        entry->updated = true;
+        entry->data = data;
+
+        if ( len == 4 )
+            break;
+
+        data >>= 32;
+        /* fallthrough */
+    case PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET:
+    {
+        bool new_masked = data & PCI_MSIX_VECTOR_BITMASK;
+        const struct pci_dev *pdev = msix->pdev;
+
+        if ( entry->masked == new_masked )
+            /* No change in the mask bit, nothing to do. */
+            break;
+
+        /*
+         * Update the masked state before calling vpci_msix_arch_enable_entry,
+         * so that it picks the new state.
+         */
+        entry->masked = new_masked;
+        if ( !new_masked && msix->enabled && !msix->masked && entry->updated )
+        {
+            /*
+             * If MSI-X is enabled, the function mask is not active, the entry
+             * is being unmasked and there have been changes to the address or
+             * data fields Xen needs to disable and enable the entry in order
+             * to pick up the changes.
+             */
+            if ( update_entry(entry, pdev, vmsix_entry_nr(msix, entry)) )
+                break;
+
+            entry->updated = false;
+        }
+        else
+            vpci_msix_arch_mask_entry(entry, pdev, entry->masked);
+
+        break;
+    }
+
+    default:
+        ASSERT_UNREACHABLE();
+        break;
+    }
+    spin_unlock(&msix->pdev->vpci->lock);
+
+    return X86EMUL_OKAY;
+}
+
+static const struct hvm_mmio_ops vpci_msix_table_ops = {
+    .check = msix_accept,
+    .read = msix_read,
+    .write = msix_write,
+};
+
+static int init_msix(struct pci_dev *pdev)
+{
+    struct domain *d = pdev->domain;
+    uint8_t slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
+    unsigned int msix_offset, i, max_entries;
+    uint16_t control;
+    int rc;
+
+    msix_offset = pci_find_cap_offset(pdev->seg, pdev->bus, slot, func,
+                                      PCI_CAP_ID_MSIX);
+    if ( !msix_offset )
+        return 0;
+
+    control = pci_conf_read16(pdev->seg, pdev->bus, slot, func,
+                              msix_control_reg(msix_offset));
+
+    max_entries = msix_table_size(control);
+
+    pdev->vpci->msix = xzalloc_bytes(VMSIX_SIZE(max_entries));
+    if ( !pdev->vpci->msix )
+        return -ENOMEM;
+
+    pdev->vpci->msix->max_entries = max_entries;
+    pdev->vpci->msix->pdev = pdev;
+
+    pdev->vpci->msix->tables[VPCI_MSIX_TABLE] =
+        pci_conf_read32(pdev->seg, pdev->bus, slot, func,
+                        msix_table_offset_reg(msix_offset));
+    pdev->vpci->msix->tables[VPCI_MSIX_PBA] =
+        pci_conf_read32(pdev->seg, pdev->bus, slot, func,
+                        msix_pba_offset_reg(msix_offset));
+
+    for ( i = 0; i < pdev->vpci->msix->max_entries; i++)
+    {
+        pdev->vpci->msix->entries[i].masked = true;
+        vpci_msix_arch_init_entry(&pdev->vpci->msix->entries[i]);
+    }
+
+    rc = vpci_add_register(pdev->vpci, control_read, control_write,
+                           msix_control_reg(msix_offset), 2, pdev->vpci->msix);
+    if ( rc )
+        return rc;
+
+    if ( list_empty(&d->arch.hvm_domain.msix_tables) )
+        register_mmio_handler(d, &vpci_msix_table_ops);
+
+    list_add(&pdev->vpci->msix->next, &d->arch.hvm_domain.msix_tables);
+
+    return 0;
+}
+REGISTER_VPCI_INIT(init_msix, VPCI_PRIORITY_HIGH);
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c
index 3012b30013..8ec9c916ea 100644
--- a/xen/drivers/vpci/vpci.c
+++ b/xen/drivers/vpci/vpci.c
@@ -47,6 +47,7 @@ void vpci_remove_device(struct pci_dev *pdev)
         xfree(r);
     }
     spin_unlock(&pdev->vpci->lock);
+    xfree(pdev->vpci->msix);
     xfree(pdev->vpci->msi);
     xfree(pdev->vpci);
     pdev->vpci = NULL;
diff --git a/xen/include/asm-x86/hvm/domain.h b/xen/include/asm-x86/hvm/domain.h
index 4c43502e00..e2530c6b81 100644
--- a/xen/include/asm-x86/hvm/domain.h
+++ b/xen/include/asm-x86/hvm/domain.h
@@ -186,6 +186,9 @@ struct hvm_domain {
     struct list_head mmcfg_regions;
     rwlock_t mmcfg_lock;
 
+    /* List of MSI-X tables. */
+    struct list_head msix_tables;
+
     /* List of permanently write-mapped pages. */
     struct {
         spinlock_t lock;
diff --git a/xen/include/asm-x86/hvm/io.h b/xen/include/asm-x86/hvm/io.h
index 0fedb3473c..e6b6ed0b92 100644
--- a/xen/include/asm-x86/hvm/io.h
+++ b/xen/include/asm-x86/hvm/io.h
@@ -132,6 +132,11 @@ struct vpci_arch_msi {
     int pirq;
 };
 
+/* Arch-specific MSI-X entry data for vPCI. */
+struct vpci_arch_msix_entry {
+    int pirq;
+};
+
 enum stdvga_cache_state {
     STDVGA_CACHE_UNINITIALIZED,
     STDVGA_CACHE_ENABLED,
diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h
index 7266c17679..fc47163ba6 100644
--- a/xen/include/xen/vpci.h
+++ b/xen/include/xen/vpci.h
@@ -115,6 +115,34 @@ struct vpci {
         struct vpci_arch_msi arch;
 #endif
     } *msi;
+
+    /* MSI-X data. */
+    struct vpci_msix {
+#ifdef __XEN__
+        struct pci_dev *pdev;
+        /* List link. */
+        struct list_head next;
+        /* Table information. */
+#define VPCI_MSIX_TABLE     0
+#define VPCI_MSIX_PBA       1
+#define VPCI_MSIX_MEM_NUM   2
+        uint32_t tables[VPCI_MSIX_MEM_NUM];
+        /* Maximum number of vectors supported by the device. */
+        uint16_t max_entries : 12;
+        /* MSI-X enabled? */
+        bool enabled         : 1;
+        /* Masked? */
+        bool masked          : 1;
+        /* Entries. */
+        struct vpci_msix_entry {
+            uint64_t addr;
+            uint32_t data;
+            bool masked  : 1;
+            bool updated : 1;
+            struct vpci_arch_msix_entry arch;
+        } entries[];
+#endif
+    } *msix;
 };
 
 struct vpci_vcpu {
@@ -137,6 +165,51 @@ int __must_check vpci_msi_arch_enable(struct vpci_msi *msi,
 void vpci_msi_arch_disable(struct vpci_msi *msi, const struct pci_dev *pdev);
 void vpci_msi_arch_init(struct vpci_msi *msi);
 void vpci_msi_arch_print(const struct vpci_msi *msi);
+
+/* Arch-specific vPCI MSI-X helpers. */
+void vpci_msix_arch_mask_entry(struct vpci_msix_entry *entry,
+                               const struct pci_dev *pdev, bool mask);
+int __must_check vpci_msix_arch_enable_entry(struct vpci_msix_entry *entry,
+                                             const struct pci_dev *pdev,
+                                             paddr_t table_base);
+int __must_check vpci_msix_arch_disable_entry(struct vpci_msix_entry *entry,
+                                              const struct pci_dev *pdev);
+void vpci_msix_arch_init_entry(struct vpci_msix_entry *entry);
+int vpci_msix_arch_print(const struct vpci_msix *msix);
+
+/*
+ * Helper functions to fetch MSIX related data. They are used by both the
+ * emulated MSIX code and the BAR handlers.
+ */
+static inline paddr_t vmsix_table_base(const struct vpci *vpci, unsigned int 
nr)
+{
+    return vpci->header.bars[vpci->msix->tables[nr] & PCI_MSIX_BIRMASK].addr;
+}
+
+static inline paddr_t vmsix_table_addr(const struct vpci *vpci, unsigned int 
nr)
+{
+    return vmsix_table_base(vpci, nr) +
+           (vpci->msix->tables[nr] & ~PCI_MSIX_BIRMASK);
+}
+
+/*
+ * Note regarding the size calculation of the PBA: the spec mentions "The last
+ * QWORD will not necessarily be fully populated", so it implies that the PBA
+ * size is 64-bit aligned.
+ */
+static inline size_t vmsix_table_size(const struct vpci *vpci, unsigned int nr)
+{
+    return
+        (nr == VPCI_MSIX_TABLE) ? vpci->msix->max_entries * PCI_MSIX_ENTRY_SIZE
+                                : ROUNDUP(DIV_ROUND_UP(vpci->msix->max_entries,
+                                                       8), 8);
+}
+
+static inline unsigned int vmsix_entry_nr(const struct vpci_msix *msix,
+                                          const struct vpci_msix_entry *entry)
+{
+    return entry - msix->entries;
+}
 #endif /* __XEN__ */
 
 #else /* !CONFIG_HAS_VPCI */
--
generated by git-patchbot for /home/xen/git/xen.git#master

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/xen-changelog

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.