[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v3 9/9] vpci/msix: add MSI-X handlers



Add handlers for accesses to the MSI-X message control field on the PCI
configuration space, and traps for accesses to the memory region that contains
the MSI-X table. This traps detect attempts from the guest to configure MSI-X
interrupts and properly sets them up.

Note that accesses to the Table Offset, Table BIR, PBA Offset, PBA BIR and the
PBA memory region itself are not trapped by Xen at the moment.

Whether Xen is going to provide this functionality to Dom0 (MSI-X emulation) is
controlled by the "msix" option in the dom0 field. When disabling this option
Xen will hide the MSI-X capability structure from Dom0.

Signed-off-by: Roger Pau Monné <roger.pau@xxxxxxxxxx>
---
Cc: Jan Beulich <jbeulich@xxxxxxxx>
Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
---
Changes since v2:
 - Split out arch-specific code.

This patch has been tested with devices using both a single MSI-X entry and
multiple ones.
---
 docs/misc/xen-command-line.markdown |   7 +
 xen/arch/x86/dom0_build.c           |   4 +
 xen/arch/x86/hvm/hvm.c              |   1 +
 xen/arch/x86/hvm/vmsi.c             | 114 ++++++++-
 xen/drivers/vpci/Makefile           |   2 +-
 xen/drivers/vpci/capabilities.c     |  16 +-
 xen/drivers/vpci/header.c           |  41 ++-
 xen/drivers/vpci/msix.c             | 487 ++++++++++++++++++++++++++++++++++++
 xen/include/asm-x86/hvm/domain.h    |   3 +
 xen/include/asm-x86/hvm/io.h        |  18 ++
 xen/include/xen/vpci.h              |  27 ++
 11 files changed, 696 insertions(+), 24 deletions(-)
 create mode 100644 xen/drivers/vpci/msix.c

diff --git a/docs/misc/xen-command-line.markdown 
b/docs/misc/xen-command-line.markdown
index 73013aea14..7c1684511c 100644
--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -684,6 +684,13 @@ enabled.
 Enable or disable (using the `no-` prefix) the MSI emulation inside of
 Xen for a PVH Dom0. Note that this option has no effect on a PV Dom0.
 
+> `msix`
+
+> Default: `true`
+
+Enable or disable (using the `no-` prefix) the MSI-X emulation inside of
+Xen for a PVH Dom0. Note that this option has no effect on a PV Dom0.
+
 ### dtuart (ARM)
 > `= path [:options]`
 
diff --git a/xen/arch/x86/dom0_build.c b/xen/arch/x86/dom0_build.c
index 01afcf6215..3996d9dd12 100644
--- a/xen/arch/x86/dom0_build.c
+++ b/xen/arch/x86/dom0_build.c
@@ -177,6 +177,7 @@ bool __initdata opt_dom0_shadow;
 #endif
 bool __initdata dom0_pvh;
 bool __initdata dom0_msi = true;
+bool __initdata dom0_msix = true;
 
 /*
  * List of parameters that affect Dom0 creation:
@@ -184,6 +185,7 @@ bool __initdata dom0_msi = true;
  *  - pvh               Create a PVHv2 Dom0.
  *  - shadow            Use shadow paging for Dom0.
  *  - msi               MSI functionality.
+ *  - msix              MSI-X functionality.
  */
 static void __init parse_dom0_param(char *s)
 {
@@ -207,6 +209,8 @@ static void __init parse_dom0_param(char *s)
 #endif
         else if ( !strcmp(s, "msi") )
             dom0_msi = enabled;
+        else if ( !strcmp(s, "msix") )
+            dom0_msix = enabled;
 
         s = ss + 1;
     } while ( ss );
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index ef3ad2a615..3a3296ffe7 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -614,6 +614,7 @@ int hvm_domain_initialise(struct domain *d)
     INIT_LIST_HEAD(&d->arch.hvm_domain.write_map.list);
     INIT_LIST_HEAD(&d->arch.hvm_domain.g2m_ioport_list);
     INIT_LIST_HEAD(&d->arch.hvm_domain.ecam_regions);
+    INIT_LIST_HEAD(&d->arch.hvm_domain.msix_tables);
 
     hvm_init_cacheattr_region_list(d);
 
diff --git a/xen/arch/x86/hvm/vmsi.c b/xen/arch/x86/hvm/vmsi.c
index f23b2f43d6..00a15b862f 100644
--- a/xen/arch/x86/hvm/vmsi.c
+++ b/xen/arch/x86/hvm/vmsi.c
@@ -643,15 +643,15 @@ static unsigned int msi_flags(uint16_t data, uint64_t 
addr)
            (trig_mode << GFLAGS_SHIFT_TRG_MODE);
 }
 
-void vpci_msi_mask(struct vpci_arch_msi *arch, unsigned int entry, bool mask)
+static void vpci_mask_pirq(int pirq, bool mask)
 {
     struct pirq *pinfo;
     struct irq_desc *desc;
     unsigned long flags;
     int irq;
 
-    ASSERT(arch->pirq != -1);
-    pinfo = pirq_info(current->domain, arch->pirq + entry);
+    ASSERT(pirq != -1);
+    pinfo = pirq_info(current->domain, pirq);
     ASSERT(pinfo);
 
     irq = pinfo->arch.irq;
@@ -665,6 +665,11 @@ void vpci_msi_mask(struct vpci_arch_msi *arch, unsigned 
int entry, bool mask)
     spin_unlock_irqrestore(&desc->lock, flags);
 }
 
+void vpci_msi_mask(struct vpci_arch_msi *arch, unsigned int entry, bool mask)
+{
+    vpci_mask_pirq(arch->pirq + entry, mask);
+}
+
 int vpci_msi_enable(struct vpci_arch_msi *arch, struct pci_dev *pdev,
                     uint64_t address, uint32_t data, unsigned int vectors)
 {
@@ -763,3 +768,106 @@ void vpci_msi_arch_print(struct vpci_arch_msi *arch)
     printk("PIRQ: %d\n", arch->pirq);
 }
 
+void vpci_msix_mask(struct vpci_arch_msix_entry *arch, bool mask)
+{
+    vpci_mask_pirq(arch->pirq, mask);
+}
+
+int vpci_msix_enable(struct vpci_arch_msix_entry *arch, struct pci_dev *pdev,
+                     uint64_t address, uint32_t data, unsigned int entry_nr,
+                     paddr_t table_base)
+{
+    struct domain *d = pdev->domain;
+    xen_domctl_bind_pt_irq_t bind = {
+        .hvm_domid = DOMID_SELF,
+        .irq_type = PT_IRQ_TYPE_MSI,
+        .u.msi.gvec = msi_vector(data),
+        .u.msi.gflags = msi_flags(data, address),
+    };
+    int rc;
+
+    if ( arch->pirq == -1 )
+    {
+        struct msi_info msi_info = {
+            .seg = pdev->seg,
+            .bus = pdev->bus,
+            .devfn = pdev->devfn,
+            .table_base = table_base,
+            .entry_nr = entry_nr,
+        };
+        int index = -1;
+
+        /* Map PIRQ. */
+        rc = allocate_and_map_msi_pirq(pdev->domain, &index, &arch->pirq,
+                                       &msi_info);
+        if ( rc )
+        {
+            gdprintk(XENLOG_ERR,
+                     "%04x:%02x:%02x.%u: unable to map MSI-X PIRQ entry %u: 
%d\n",
+                     pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
+                     PCI_FUNC(pdev->devfn), entry_nr, rc);
+            return rc;
+        }
+    }
+
+    bind.machine_irq = arch->pirq;
+    pcidevs_lock();
+    rc = pt_irq_create_bind(d, &bind);
+    if ( rc )
+    {
+        gdprintk(XENLOG_ERR,
+                 "%04x:%02x:%02x.%u: unable to create MSI-X bind %u: %d\n",
+                 pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
+                 PCI_FUNC(pdev->devfn), entry_nr, rc);
+        spin_lock(&pdev->domain->event_lock);
+        unmap_domain_pirq(pdev->domain, arch->pirq);
+        spin_unlock(&pdev->domain->event_lock);
+        arch->pirq = -1;
+    }
+    pcidevs_unlock();
+
+    return 0;
+}
+
+int vpci_msix_disable(struct vpci_arch_msix_entry *arch)
+{
+    xen_domctl_bind_pt_irq_t bind = {
+        .hvm_domid = DOMID_SELF,
+        .irq_type = PT_IRQ_TYPE_MSI,
+        .machine_irq = arch->pirq,
+    };
+    int rc;
+
+    if ( arch->pirq == -1 )
+        return 0;
+
+    pcidevs_lock();
+    rc = pt_irq_destroy_bind(current->domain, &bind);
+    if ( rc )
+    {
+        pcidevs_unlock();
+        return rc;
+    }
+
+    spin_lock(&current->domain->event_lock);
+    unmap_domain_pirq(current->domain, arch->pirq);
+    spin_unlock(&current->domain->event_lock);
+    pcidevs_unlock();
+
+    arch->pirq = -1;
+
+    return 0;
+}
+
+int vpci_msix_arch_init(struct vpci_arch_msix_entry *arch)
+{
+    arch->pirq = -1;
+    return 0;
+}
+
+void vpci_msix_arch_print(struct vpci_arch_msix_entry *arch)
+{
+    /* No newline, it will be added by the generic debug handler. */
+    printk("pirq: %d", arch->pirq);
+}
+
diff --git a/xen/drivers/vpci/Makefile b/xen/drivers/vpci/Makefile
index ef4fc6caf3..55398d4428 100644
--- a/xen/drivers/vpci/Makefile
+++ b/xen/drivers/vpci/Makefile
@@ -1 +1 @@
-obj-y += vpci.o header.o capabilities.o msi.o
+obj-y += vpci.o header.o capabilities.o msi.o msix.o
diff --git a/xen/drivers/vpci/capabilities.c b/xen/drivers/vpci/capabilities.c
index ad9f45c2e1..7166ccb502 100644
--- a/xen/drivers/vpci/capabilities.c
+++ b/xen/drivers/vpci/capabilities.c
@@ -130,21 +130,7 @@ void xen_vpci_mask_capability(struct pci_dev *pdev, 
uint8_t cap_id)
     }
 }
 
-static int vpci_capabilities_init(struct pci_dev *pdev)
-{
-    int rc;
-
-    rc = vpci_index_capabilities(pdev);
-    if ( rc )
-        return rc;
-
-    /* Mask MSI-X capability until Xen handles it. */
-    xen_vpci_mask_capability(pdev, PCI_CAP_ID_MSIX);
-
-    return 0;
-}
-
-REGISTER_VPCI_INIT(vpci_capabilities_init, true);
+REGISTER_VPCI_INIT(vpci_index_capabilities, true);
 
 /*
  * Local variables:
diff --git a/xen/drivers/vpci/header.c b/xen/drivers/vpci/header.c
index 3deec53efd..c03dcdd708 100644
--- a/xen/drivers/vpci/header.c
+++ b/xen/drivers/vpci/header.c
@@ -32,16 +32,47 @@ static int vpci_modify_bars(struct pci_dev *pdev, const 
bool map)
         paddr_t gaddr = map ? header->bars[i].gaddr
                             : header->bars[i].mapped_addr;
         paddr_t paddr = header->bars[i].paddr;
+        size_t size = header->bars[i].size;
 
         if ( header->bars[i].type != VPCI_BAR_MEM &&
              header->bars[i].type != VPCI_BAR_MEM64_LO )
             continue;
 
-        rc = modify_mmio(pdev->domain, _gfn(PFN_DOWN(gaddr)),
-                         _mfn(PFN_DOWN(paddr)), PFN_UP(header->bars[i].size),
-                         map);
-        if ( rc )
-            break;
+        if ( pdev->vpci->msix != NULL && pdev->vpci->msix->bir == i )
+        {
+            /* There's an MSI-X table inside of this BAR. */
+            paddr_t msix_gaddr = gaddr + pdev->vpci->msix->offset;
+            paddr_t msix_paddr = paddr + pdev->vpci->msix->offset;
+            size_t msix_size = pdev->vpci->msix->max_entries *
+                               PCI_MSIX_ENTRY_SIZE;
+
+            ASSERT(IS_ALIGNED(msix_gaddr, PAGE_SIZE) &&
+                   IS_ALIGNED(msix_paddr, PAGE_SIZE));
+
+            rc = modify_mmio(pdev->domain, _gfn(PFN_DOWN(gaddr)),
+                             _mfn(PFN_DOWN(paddr)),
+                             PFN_DOWN(msix_paddr - paddr), map);
+            if ( rc )
+                break;
+
+            rc = modify_mmio(pdev->domain,
+                             _gfn(PFN_UP(msix_gaddr + msix_size)),
+                             _mfn(PFN_UP(msix_paddr + msix_size)),
+                             PFN_UP(paddr + size -
+                                    round_pgup(msix_paddr + msix_size)), map);
+            if ( rc )
+                break;
+
+            if ( map )
+                pdev->vpci->msix->addr = msix_gaddr;
+        }
+        else
+        {
+            rc = modify_mmio(pdev->domain, _gfn(PFN_DOWN(gaddr)),
+                             _mfn(PFN_DOWN(paddr)), PFN_UP(size), map);
+            if ( rc )
+                break;
+        }
 
         header->bars[i].mapped_addr = map ? gaddr : 0;
     }
diff --git a/xen/drivers/vpci/msix.c b/xen/drivers/vpci/msix.c
new file mode 100644
index 0000000000..df45dca917
--- /dev/null
+++ b/xen/drivers/vpci/msix.c
@@ -0,0 +1,487 @@
+/*
+ * Handlers for accesses to the MSI-X capability structure and the memory
+ * region.
+ *
+ * Copyright (C) 2017 Citrix Systems R&D
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms and conditions of the GNU General Public
+ * License, version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <xen/sched.h>
+#include <xen/vpci.h>
+#include <asm/msi.h>
+#include <xen/p2m-common.h>
+#include <xen/keyhandler.h>
+
+#define MSIX_SIZE(num) (offsetof(struct vpci_msix, entries[num]))
+
+static int vpci_msix_control_read(struct pci_dev *pdev, unsigned int reg,
+                                  union vpci_val *val, void *data)
+{
+    struct vpci_msix *msix = data;
+
+    val->word = (msix->max_entries - 1) & PCI_MSIX_FLAGS_QSIZE;
+    val->word |= msix->enabled ? PCI_MSIX_FLAGS_ENABLE : 0;
+    val->word |= msix->masked ? PCI_MSIX_FLAGS_MASKALL : 0;
+
+    return 0;
+}
+
+static int vpci_msix_control_write(struct pci_dev *pdev, unsigned int reg,
+                                   union vpci_val val, void *data)
+{
+    uint8_t seg = pdev->seg, bus = pdev->bus;
+    uint8_t slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
+    paddr_t table_base = pdev->vpci->header.bars[pdev->vpci->msix->bir].paddr;
+    struct vpci_msix *msix = data;
+    bool new_masked, new_enabled;
+    unsigned int i;
+    uint32_t data32;
+    int rc;
+
+    new_masked = val.word & PCI_MSIX_FLAGS_MASKALL;
+    new_enabled = val.word & PCI_MSIX_FLAGS_ENABLE;
+
+    if ( new_enabled != msix->enabled && new_enabled )
+    {
+        /* MSI-X enabled. */
+        for ( i = 0; i < msix->max_entries; i++ )
+        {
+            if ( msix->entries[i].masked )
+                continue;
+
+            rc = vpci_msix_enable(&msix->entries[i].arch, pdev,
+                                  msix->entries[i].addr, msix->entries[i].data,
+                                  msix->entries[i].nr, table_base);
+            if ( rc )
+            {
+                gdprintk(XENLOG_ERR,
+                         "%04x:%02x:%02x.%u: unable to update entry %u: %d\n",
+                         seg, bus, slot, func, i, rc);
+                return rc;
+            }
+
+            vpci_msix_mask(&msix->entries[i].arch, false);
+        }
+    }
+    else if ( new_enabled != msix->enabled && !new_enabled )
+    {
+        /* MSI-X disabled. */
+        for ( i = 0; i < msix->max_entries; i++ )
+        {
+            rc = vpci_msix_disable(&msix->entries[i].arch);
+            if ( rc )
+            {
+                gdprintk(XENLOG_ERR,
+                         "%04x:%02x:%02x.%u: unable to disable entry %u: %d\n",
+                         seg, bus, slot, func, i, rc);
+                return rc;
+            }
+        }
+    }
+
+    data32 = val.word;
+    if ( (new_enabled != msix->enabled || new_masked != msix->masked) &&
+         pci_msi_conf_write_intercept(pdev, reg, 2, &data32) >= 0 )
+        pci_conf_write16(seg, bus, slot, func, reg, data32);
+
+    msix->masked = new_masked;
+    msix->enabled = new_enabled;
+
+    return 0;
+}
+
+static struct vpci_msix *vpci_msix_find(struct domain *d, unsigned long addr)
+{
+    struct vpci_msix *msix;
+
+    ASSERT(vpci_locked(d));
+    list_for_each_entry ( msix,  &d->arch.hvm_domain.msix_tables, next )
+        if ( msix->pdev->vpci->header.command & PCI_COMMAND_MEMORY &&
+             addr >= msix->addr &&
+             addr < msix->addr + msix->max_entries * PCI_MSIX_ENTRY_SIZE )
+            return msix;
+
+    return NULL;
+}
+
+static int vpci_msix_table_accept(struct vcpu *v, unsigned long addr)
+{
+    int found;
+
+    vpci_lock(v->domain);
+    found = !!vpci_msix_find(v->domain, addr);
+    vpci_unlock(v->domain);
+
+    return found;
+}
+
+static int vpci_msix_access_check(struct pci_dev *pdev, unsigned long addr,
+                                  unsigned int len)
+{
+    uint8_t seg = pdev->seg, bus = pdev->bus;
+    uint8_t slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
+
+
+    /* Only allow 32/64b accesses. */
+    if ( len != 4 && len != 8 )
+    {
+        gdprintk(XENLOG_ERR,
+                 "%04x:%02x:%02x.%u: invalid MSI-X table access size: %u\n",
+                 seg, bus, slot, func, len);
+        return -EINVAL;
+    }
+
+    /* Do no allow accesses that span across multiple entries. */
+    if ( (addr & (PCI_MSIX_ENTRY_SIZE - 1)) + len > PCI_MSIX_ENTRY_SIZE )
+    {
+        gdprintk(XENLOG_ERR,
+                 "%04x:%02x:%02x.%u: MSI-X access crosses entry boundary\n",
+                 seg, bus, slot, func);
+        return -EINVAL;
+    }
+
+    /*
+     * Only allow 64b accesses to the low message address field.
+     *
+     * NB: this is more restrictive than the specification, that allows 64b
+     * accesses to other fields under certain circumstances, so this check and
+     * the code will have to be fixed in order to fully comply with the
+     * specification.
+     */
+    if ( (addr & (PCI_MSIX_ENTRY_SIZE - 1)) != 0 && len != 4 )
+    {
+        gdprintk(XENLOG_ERR,
+                 "%04x:%02x:%02x.%u: 64bit MSI-X table access to 32bit field"
+                 " (offset: %#lx len: %u)\n", seg, bus, slot, func,
+                 addr & (PCI_MSIX_ENTRY_SIZE - 1), len);
+        return -EINVAL;
+    }
+
+    return 0;
+}
+
+static struct vpci_msix_entry *vpci_msix_get_entry(struct vpci_msix *msix,
+                                                   unsigned long addr)
+{
+    return &msix->entries[(addr - msix->addr) / PCI_MSIX_ENTRY_SIZE];
+}
+
+static int vpci_msix_table_read(struct vcpu *v, unsigned long addr,
+                                unsigned int len, unsigned long *data)
+{
+    struct vpci_msix *msix;
+    struct vpci_msix_entry *entry;
+    unsigned int offset;
+
+    vpci_lock(v->domain);
+    msix = vpci_msix_find(v->domain, addr);
+    if ( !msix )
+    {
+        vpci_unlock(v->domain);
+        return X86EMUL_UNHANDLEABLE;
+    }
+
+    if ( vpci_msix_access_check(msix->pdev, addr, len) )
+    {
+        vpci_unlock(v->domain);
+        return X86EMUL_UNHANDLEABLE;
+    }
+
+    /* Get the table entry and offset. */
+    entry = vpci_msix_get_entry(msix, addr);
+    offset = addr & (PCI_MSIX_ENTRY_SIZE - 1);
+
+    switch ( offset )
+    {
+    case PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET:
+        *data = entry->addr;
+        break;
+    case PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET:
+        *data = entry->addr >> 32;
+        break;
+    case PCI_MSIX_ENTRY_DATA_OFFSET:
+        *data = entry->data;
+        break;
+    case PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET:
+        *data = entry->masked ? PCI_MSIX_VECTOR_BITMASK : 0;
+        break;
+    default:
+        BUG();
+    }
+    vpci_unlock(v->domain);
+
+    return X86EMUL_OKAY;
+}
+
+static int vpci_msix_table_write(struct vcpu *v, unsigned long addr,
+                                 unsigned int len, unsigned long data)
+{
+    struct vpci_msix *msix;
+    struct vpci_msix_entry *entry;
+    unsigned int offset;
+
+    vpci_lock(v->domain);
+    msix = vpci_msix_find(v->domain, addr);
+    if ( !msix )
+    {
+        vpci_unlock(v->domain);
+        return X86EMUL_UNHANDLEABLE;
+    }
+
+    if ( vpci_msix_access_check(msix->pdev, addr, len) )
+    {
+        vpci_unlock(v->domain);
+        return X86EMUL_UNHANDLEABLE;
+    }
+
+    /* Get the table entry and offset. */
+    entry = vpci_msix_get_entry(msix, addr);
+    offset = addr & (PCI_MSIX_ENTRY_SIZE - 1);
+
+    switch ( offset )
+    {
+    case PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET:
+        if ( len == 8 )
+        {
+            entry->addr = data;
+            break;
+        }
+        entry->addr &= ~GENMASK(31, 0);
+        entry->addr |= data;
+        break;
+    case PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET:
+        entry->addr &= ~GENMASK(63, 32);
+        entry->addr |= data << 32;
+        break;
+    case PCI_MSIX_ENTRY_DATA_OFFSET:
+        entry->data = data;
+        break;
+    case PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET:
+    {
+        bool new_masked = data & PCI_MSIX_VECTOR_BITMASK;
+        struct pci_dev *pdev = msix->pdev;
+        paddr_t table_base =
+            pdev->vpci->header.bars[pdev->vpci->msix->bir].paddr;
+        int rc;
+
+        if ( !msix->enabled )
+        {
+            entry->masked = new_masked;
+            break;
+        }
+
+        if ( new_masked != entry->masked && !new_masked )
+        {
+            /* Unmasking an entry, update it. */
+            rc = vpci_msix_enable(&entry->arch, msix->pdev, entry->addr,
+                                  entry->data, entry->nr, table_base);
+            if ( rc )
+            {
+                vpci_unlock(v->domain);
+                gdprintk(XENLOG_ERR,
+                         "%04x:%02x:%02x.%u: unable to update entry %u: %d\n",
+                         pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
+                         PCI_FUNC(pdev->devfn), entry->nr, rc);
+                return X86EMUL_UNHANDLEABLE;
+            }
+        }
+
+        vpci_msix_mask(&entry->arch, new_masked);
+        entry->masked = new_masked;
+
+        break;
+    }
+    default:
+        BUG();
+    }
+    vpci_unlock(v->domain);
+
+    return X86EMUL_OKAY;
+}
+
+static const struct hvm_mmio_ops vpci_msix_table_ops = {
+    .check = vpci_msix_table_accept,
+    .read = vpci_msix_table_read,
+    .write = vpci_msix_table_write,
+};
+
+static int vpci_init_msix(struct pci_dev *pdev)
+{
+    struct domain *d = pdev->domain;
+    uint8_t seg = pdev->seg, bus = pdev->bus;
+    uint8_t slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
+    struct vpci_msix *msix;
+    unsigned int msix_offset, i, max_entries;
+    paddr_t msix_paddr;
+    uint16_t control;
+    int rc;
+
+    msix_offset = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSIX);
+    if ( !msix_offset )
+        return 0;
+
+    if ( !vpci_msix_enabled(pdev->domain) )
+    {
+        xen_vpci_mask_capability(pdev, PCI_CAP_ID_MSIX);
+        return 0;
+    }
+
+    control = pci_conf_read16(seg, bus, slot, func,
+                              msix_control_reg(msix_offset));
+
+    /* Get the maximum number of vectors the device supports. */
+    max_entries = msix_table_size(control);
+    if ( !max_entries )
+        return 0;
+
+    msix = xzalloc_bytes(MSIX_SIZE(max_entries));
+    if ( !msix )
+        return -ENOMEM;
+
+    msix->max_entries = max_entries;
+    msix->pdev = pdev;
+
+    /* Find the MSI-X table address. */
+    msix->offset = pci_conf_read32(seg, bus, slot, func,
+                                   msix_table_offset_reg(msix_offset));
+    msix->bir = msix->offset & PCI_MSIX_BIRMASK;
+    msix->offset &= ~PCI_MSIX_BIRMASK;
+
+    ASSERT(pdev->vpci->header.bars[msix->bir].type == VPCI_BAR_MEM ||
+           pdev->vpci->header.bars[msix->bir].type == VPCI_BAR_MEM64_LO);
+    msix->addr = pdev->vpci->header.bars[msix->bir].mapped_addr + msix->offset;
+    msix_paddr = pdev->vpci->header.bars[msix->bir].paddr + msix->offset;
+
+    for ( i = 0; i < msix->max_entries; i++)
+    {
+        msix->entries[i].masked = true;
+        msix->entries[i].nr = i;
+        vpci_msix_arch_init(&msix->entries[i].arch);
+    }
+
+    if ( list_empty(&d->arch.hvm_domain.msix_tables) )
+        register_mmio_handler(d, &vpci_msix_table_ops);
+
+    list_add(&msix->next, &d->arch.hvm_domain.msix_tables);
+
+    rc = xen_vpci_add_register(pdev, vpci_msix_control_read,
+                               vpci_msix_control_write,
+                               msix_control_reg(msix_offset), 2, msix);
+    if ( rc )
+    {
+        dprintk(XENLOG_ERR,
+                "%04x:%02x:%02x.%u: failed to add handler for MSI-X control: 
%d\n",
+                seg, bus, slot, func, rc);
+        goto error;
+    }
+
+    if ( pdev->vpci->header.command & PCI_COMMAND_MEMORY )
+    {
+        /* Unmap this memory from the guest. */
+        rc = modify_mmio(pdev->domain, _gfn(PFN_DOWN(msix->addr)),
+                         _mfn(PFN_DOWN(msix_paddr)),
+                         PFN_UP(msix->max_entries * PCI_MSIX_ENTRY_SIZE),
+                         false);
+        if ( rc )
+        {
+            dprintk(XENLOG_ERR,
+                    "%04x:%02x:%02x.%u: unable to unmap MSI-X BAR region: 
%d\n",
+                    seg, bus, slot, func, rc);
+            goto error;
+        }
+    }
+
+    pdev->vpci->msix = msix;
+
+    return 0;
+
+ error:
+    ASSERT(rc);
+    xfree(msix);
+    return rc;
+}
+
+REGISTER_VPCI_INIT(vpci_init_msix, false);
+
+static void vpci_dump_msix(unsigned char key)
+{
+    struct domain *d;
+    struct pci_dev *pdev;
+
+    printk("Guest MSI-X information:\n");
+
+    for_each_domain ( d )
+    {
+        if ( !has_vpci(d) )
+            continue;
+
+        vpci_lock(d);
+        list_for_each_entry ( pdev, &d->arch.pdev_list, domain_list)
+        {
+            uint8_t seg = pdev->seg, bus = pdev->bus;
+            uint8_t slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
+            struct vpci_msix *msix = pdev->vpci->msix;
+            unsigned int i;
+
+            if ( !msix )
+                continue;
+
+            printk("Device %04x:%02x:%02x.%u\n", seg, bus, slot, func);
+
+            printk("Max entries: %u maskall: %u enabled: %u\n",
+                   msix->max_entries, msix->masked, msix->enabled);
+
+            printk("Guest entries:\n");
+            for ( i = 0; i < msix->max_entries; i++ )
+            {
+                struct vpci_msix_entry *entry = &msix->entries[i];
+                uint32_t data = entry->data;
+                uint64_t addr = entry->addr;
+
+                printk("%4u vec=%#02x%7s%6s%3sassert%5s%7s dest_id=%lu mask=%u 
",
+                       i,
+                       (data & MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT,
+                       data & MSI_DATA_DELIVERY_LOWPRI ? "lowest" : "fixed",
+                       data & MSI_DATA_TRIGGER_LEVEL ? "level" : "edge",
+                       data & MSI_DATA_LEVEL_ASSERT ? "" : "de",
+                       addr & MSI_ADDR_DESTMODE_LOGIC ? "log" : "phys",
+                       addr & MSI_ADDR_REDIRECTION_LOWPRI ? "lowest" : "cpu",
+                       (addr & MSI_ADDR_DEST_ID_MASK) >> 
MSI_ADDR_DEST_ID_SHIFT,
+                       entry->masked);
+                vpci_msix_arch_print(&entry->arch);
+                printk("\n");
+            }
+            printk("\n");
+        }
+        vpci_unlock(d);
+    }
+}
+
+static int __init vpci_msix_setup_keyhandler(void)
+{
+    register_keyhandler('X', vpci_dump_msix, "dump guest MSI-X state", 1);
+    return 0;
+}
+__initcall(vpci_msix_setup_keyhandler);
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
+
diff --git a/xen/include/asm-x86/hvm/domain.h b/xen/include/asm-x86/hvm/domain.h
index b69c33df3c..2bef170cc0 100644
--- a/xen/include/asm-x86/hvm/domain.h
+++ b/xen/include/asm-x86/hvm/domain.h
@@ -198,6 +198,9 @@ struct hvm_domain {
     /* List of ECAM (MMCFG) regions trapped by Xen. */
     struct list_head ecam_regions;
 
+    /* List of MSI-X tables. */
+    struct list_head msix_tables;
+
     /* List of permanently write-mapped pages. */
     struct {
         spinlock_t lock;
diff --git a/xen/include/asm-x86/hvm/io.h b/xen/include/asm-x86/hvm/io.h
index ae3af43749..d7ecc2adbf 100644
--- a/xen/include/asm-x86/hvm/io.h
+++ b/xen/include/asm-x86/hvm/io.h
@@ -131,6 +131,10 @@ void msixtbl_init(struct domain *d);
 extern bool dom0_msi;
 #define vpci_msi_enabled(d) (!is_hardware_domain((d)) || dom0_msi)
 
+/* Is emulated MSI enabled? */
+extern bool dom0_msix;
+#define vpci_msix_enabled(d) (!is_hardware_domain((d)) || dom0_msix)
+
 /* Arch-specific MSI data for vPCI. */
 struct vpci_arch_msi {
     int pirq;
@@ -145,6 +149,20 @@ int vpci_msi_disable(struct vpci_arch_msi *arch, struct 
pci_dev *pdev,
 int vpci_msi_arch_init(struct vpci_arch_msi *arch);
 void vpci_msi_arch_print(struct vpci_arch_msi *arch);
 
+/* Arch-specific MSI-X entry data for vPCI. */
+struct vpci_arch_msix_entry {
+    int pirq;
+};
+
+/* Arch-specific vPCI MSI-X helpers. */
+void vpci_msix_mask(struct vpci_arch_msix_entry *arch, bool mask);
+int vpci_msix_enable(struct vpci_arch_msix_entry *arch, struct pci_dev *pdev,
+                     uint64_t address, uint32_t data, unsigned int entry_nr,
+                     paddr_t table_base);
+int vpci_msix_disable(struct vpci_arch_msix_entry *arch);
+int vpci_msix_arch_init(struct vpci_arch_msix_entry *arch);
+void vpci_msix_arch_print(struct vpci_arch_msix_entry *arch);
+
 enum stdvga_cache_state {
     STDVGA_CACHE_UNINITIALIZED,
     STDVGA_CACHE_ENABLED,
diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h
index 373b8d6505..8c3e56c1e4 100644
--- a/xen/include/xen/vpci.h
+++ b/xen/include/xen/vpci.h
@@ -112,6 +112,33 @@ struct vpci {
         /* Arch-specific data. */
         struct vpci_arch_msi arch;
     } *msi;
+
+    /* MSI-X data. */
+    struct vpci_msix {
+        struct pci_dev *pdev;
+        /* Maximum number of vectors supported by the device. */
+        unsigned int max_entries;
+        /* MSI-X table offset. */
+        unsigned int offset;
+        /* MSI-X table BIR. */
+        unsigned int bir;
+        /* Table addr. */
+        paddr_t addr;
+        /* MSI-X enabled? */
+        bool enabled;
+        /* Masked? */
+        bool masked;
+        /* List link. */
+        struct list_head next;
+        /* Entries. */
+        struct vpci_msix_entry {
+                unsigned int nr;
+                uint64_t addr;
+                uint32_t data;
+                bool masked;
+                struct vpci_arch_msix_entry arch;
+          } entries[];
+    } *msix;
 #endif
 };
 
-- 
2.11.0 (Apple Git-81)


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.