[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen master] vpci: add header handlers



commit 9c244fdef7e7c214316dab1bbd91cd388781cb10
Author:     Roger Pau Monne <roger.pau@xxxxxxxxxx>
AuthorDate: Thu Mar 22 15:00:00 2018 +0100
Commit:     Jan Beulich <jbeulich@xxxxxxxx>
CommitDate: Fri Mar 23 10:22:07 2018 +0100

    vpci: add header handlers
    
    Introduce a set of handlers that trap accesses to the PCI BARs and the
    command register, in order to snoop BAR sizing and BAR relocation.
    
    The command handler is used to detect changes to bit 2 (response to
    memory space accesses), and maps/unmaps the BARs of the device into
    the guest p2m. A rangeset is used in order to figure out which memory
    to map/unmap. This makes it easier to keep track of the possible
    overlaps with other BARs, and will also simplify MSI-X support, where
    certain regions of a BAR might be used for the MSI-X table or PBA.
    
    The BAR register handlers are used to detect attempts by the guest to
    size or relocate the BARs.
    
    Note that the long running BAR mapping and unmapping operations are
    deferred to be performed by hvm_io_pending, so that they can be safely
    preempted.
    
    Signed-off-by: Roger Pau Monné <roger.pau@xxxxxxxxxx>
    Reviewed-by: Jan Beulich <jbeulich@xxxxxxxx>
    [IO]
    Reviewed-by: Paul Durrant <paul.durrant@xxxxxxxxxx>
---
 tools/tests/vpci/emul.h   |   1 +
 xen/arch/x86/hvm/ioreq.c  |   4 +
 xen/drivers/vpci/Makefile |   2 +-
 xen/drivers/vpci/header.c | 545 ++++++++++++++++++++++++++++++++++++++++++++++
 xen/drivers/vpci/vpci.c   |  45 ++--
 xen/include/xen/sched.h   |   4 +
 xen/include/xen/vpci.h    |  61 ++++++
 7 files changed, 648 insertions(+), 14 deletions(-)

diff --git a/tools/tests/vpci/emul.h b/tools/tests/vpci/emul.h
index fd0317995a..5d47544bf7 100644
--- a/tools/tests/vpci/emul.h
+++ b/tools/tests/vpci/emul.h
@@ -80,6 +80,7 @@ typedef union {
     };
 } pci_sbdf_t;
 
+#define CONFIG_HAS_VPCI
 #include "vpci.h"
 
 #define __hwdom_init
diff --git a/xen/arch/x86/hvm/ioreq.c b/xen/arch/x86/hvm/ioreq.c
index 44d029499d..5b469f4b12 100644
--- a/xen/arch/x86/hvm/ioreq.c
+++ b/xen/arch/x86/hvm/ioreq.c
@@ -26,6 +26,7 @@
 #include <xen/domain.h>
 #include <xen/event.h>
 #include <xen/paging.h>
+#include <xen/vpci.h>
 
 #include <asm/hvm/hvm.h>
 #include <asm/hvm/ioreq.h>
@@ -48,6 +49,9 @@ bool hvm_io_pending(struct vcpu *v)
     struct domain *d = v->domain;
     struct hvm_ioreq_server *s;
 
+    if ( has_vpci(d) && vpci_process_pending(v) )
+        return true;
+
     list_for_each_entry ( s,
                           &d->arch.hvm_domain.ioreq_server.list,
                           list_entry )
diff --git a/xen/drivers/vpci/Makefile b/xen/drivers/vpci/Makefile
index 840a906470..241467212f 100644
--- a/xen/drivers/vpci/Makefile
+++ b/xen/drivers/vpci/Makefile
@@ -1 +1 @@
-obj-y += vpci.o
+obj-y += vpci.o header.o
diff --git a/xen/drivers/vpci/header.c b/xen/drivers/vpci/header.c
new file mode 100644
index 0000000000..25d8ec0507
--- /dev/null
+++ b/xen/drivers/vpci/header.c
@@ -0,0 +1,545 @@
+/*
+ * Generic functionality for handling accesses to the PCI header from the
+ * configuration space.
+ *
+ * Copyright (C) 2017 Citrix Systems R&D
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms and conditions of the GNU General Public
+ * License, version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <xen/p2m-common.h>
+#include <xen/sched.h>
+#include <xen/softirq.h>
+#include <xen/vpci.h>
+
+#include <asm/event.h>
+
+#define MAPPABLE_BAR(x)                                                 \
+    ((x)->type == VPCI_BAR_MEM32 || (x)->type == VPCI_BAR_MEM64_LO ||   \
+     (x)->type == VPCI_BAR_ROM)
+
+struct map_data {
+    struct domain *d;
+    bool map;
+};
+
+static int map_range(unsigned long s, unsigned long e, void *data,
+                     unsigned long *c)
+{
+    const struct map_data *map = data;
+    int rc;
+
+    for ( ; ; )
+    {
+        unsigned long size = e - s + 1;
+
+        /*
+         * ARM TODOs:
+         * - On ARM whether the memory is prefetchable or not should be passed
+         *   to map_mmio_regions in order to decide which memory attributes
+         *   should be used.
+         *
+         * - {un}map_mmio_regions doesn't support preemption.
+         */
+
+        rc = map->map ? map_mmio_regions(map->d, _gfn(s), size, _mfn(s))
+                      : unmap_mmio_regions(map->d, _gfn(s), size, _mfn(s));
+        if ( rc == 0 )
+        {
+            *c += size;
+            break;
+        }
+        if ( rc < 0 )
+        {
+            printk(XENLOG_G_WARNING
+                   "Failed to identity %smap [%lx, %lx] for d%d: %d\n",
+                   map ? "" : "un", s, e, map->d->domain_id, rc);
+            break;
+        }
+        ASSERT(rc < size);
+        *c += rc;
+        s += rc;
+        if ( general_preempt_check() )
+                return -ERESTART;
+    }
+
+    return rc;
+}
+
+/*
+ * The rom_only parameter is used to signal the map/unmap helpers that the ROM
+ * BAR's enable bit has changed with the memory decoding bit already enabled.
+ * If rom_only is not set then it's the memory decoding bit that changed.
+ */
+static void modify_decoding(const struct pci_dev *pdev, bool map, bool 
rom_only)
+{
+    struct vpci_header *header = &pdev->vpci->header;
+    uint8_t slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
+    uint16_t cmd;
+    unsigned int i;
+
+    for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
+    {
+        if ( !MAPPABLE_BAR(&header->bars[i]) )
+            continue;
+
+        if ( rom_only && header->bars[i].type == VPCI_BAR_ROM )
+        {
+            unsigned int rom_pos = (i == PCI_HEADER_NORMAL_NR_BARS)
+                                   ? PCI_ROM_ADDRESS : PCI_ROM_ADDRESS1;
+            uint32_t val = header->bars[i].addr |
+                           (map ? PCI_ROM_ADDRESS_ENABLE : 0);
+
+            header->bars[i].enabled = header->rom_enabled = map;
+            pci_conf_write32(pdev->seg, pdev->bus, slot, func, rom_pos, val);
+            return;
+        }
+
+        if ( !rom_only &&
+             (header->bars[i].type != VPCI_BAR_ROM || header->rom_enabled) )
+            header->bars[i].enabled = map;
+    }
+
+    ASSERT(!rom_only);
+    cmd = pci_conf_read16(pdev->seg, pdev->bus, slot, func, PCI_COMMAND);
+    cmd &= ~PCI_COMMAND_MEMORY;
+    cmd |= map ? PCI_COMMAND_MEMORY : 0;
+    pci_conf_write16(pdev->seg, pdev->bus, slot, func, PCI_COMMAND,
+                     cmd);
+}
+
+bool vpci_process_pending(struct vcpu *v)
+{
+    if ( v->vpci.mem )
+    {
+        struct map_data data = {
+            .d = v->domain,
+            .map = v->vpci.map,
+        };
+        int rc = rangeset_consume_ranges(v->vpci.mem, map_range, &data);
+
+        if ( rc == -ERESTART )
+            return true;
+
+        spin_lock(&v->vpci.pdev->vpci->lock);
+        /* Disable memory decoding unconditionally on failure. */
+        modify_decoding(v->vpci.pdev, !rc && v->vpci.map,
+                        !rc && v->vpci.rom_only);
+        spin_unlock(&v->vpci.pdev->vpci->lock);
+
+        rangeset_destroy(v->vpci.mem);
+        v->vpci.mem = NULL;
+        if ( rc )
+            /*
+             * FIXME: in case of failure remove the device from the domain.
+             * Note that there might still be leftover mappings. While this is
+             * safe for Dom0, for DomUs the domain will likely need to be
+             * killed in order to avoid leaking stale p2m mappings on
+             * failure.
+             */
+            vpci_remove_device(v->vpci.pdev);
+    }
+
+    return false;
+}
+
+static int __init apply_map(struct domain *d, const struct pci_dev *pdev,
+                            struct rangeset *mem)
+{
+    struct map_data data = { .d = d, .map = true };
+    int rc;
+
+    while ( (rc = rangeset_consume_ranges(mem, map_range, &data)) == -ERESTART 
)
+        process_pending_softirqs();
+    rangeset_destroy(mem);
+    if ( !rc )
+        modify_decoding(pdev, true, false);
+
+    return rc;
+}
+
+static void defer_map(struct domain *d, struct pci_dev *pdev,
+                      struct rangeset *mem, bool map, bool rom_only)
+{
+    struct vcpu *curr = current;
+
+    /*
+     * FIXME: when deferring the {un}map the state of the device should not
+     * be trusted. For example the enable bit is toggled after the device
+     * is mapped. This can lead to parallel mapping operations being
+     * started for the same device if the domain is not well-behaved.
+     */
+    curr->vpci.pdev = pdev;
+    curr->vpci.mem = mem;
+    curr->vpci.map = map;
+    curr->vpci.rom_only = rom_only;
+}
+
+static int modify_bars(const struct pci_dev *pdev, bool map, bool rom_only)
+{
+    struct vpci_header *header = &pdev->vpci->header;
+    struct rangeset *mem = rangeset_new(NULL, NULL, 0);
+    struct pci_dev *tmp, *dev = NULL;
+    unsigned int i;
+    int rc;
+
+    if ( !mem )
+        return -ENOMEM;
+
+    /*
+     * Create a rangeset that represents the current device BARs memory region
+     * and compare it against all the currently active BAR memory regions. If
+     * an overlap is found, subtract it from the region to be mapped/unmapped.
+     *
+     * First fill the rangeset with all the BARs of this device or with the ROM
+     * BAR only, depending on whether the guest is toggling the memory decode
+     * bit of the command register, or the enable bit of the ROM BAR register.
+     */
+    for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
+    {
+        const struct vpci_bar *bar = &header->bars[i];
+        unsigned long start = PFN_DOWN(bar->addr);
+        unsigned long end = PFN_DOWN(bar->addr + bar->size - 1);
+
+        if ( !MAPPABLE_BAR(bar) ||
+             (rom_only ? bar->type != VPCI_BAR_ROM
+                       : (bar->type == VPCI_BAR_ROM && !header->rom_enabled)) )
+            continue;
+
+        rc = rangeset_add_range(mem, start, end);
+        if ( rc )
+        {
+            printk(XENLOG_G_WARNING "Failed to add [%lx, %lx]: %d\n",
+                   start, end, rc);
+            rangeset_destroy(mem);
+            return rc;
+        }
+    }
+
+    /*
+     * Check for overlaps with other BARs. Note that only BARs that are
+     * currently mapped (enabled) are checked for overlaps.
+     */
+    list_for_each_entry(tmp, &pdev->domain->arch.pdev_list, domain_list)
+    {
+        if ( tmp == pdev )
+        {
+            /*
+             * Need to store the device so it's not constified and defer_map
+             * can modify it in case of error.
+             */
+            dev = tmp;
+            if ( !rom_only )
+                /*
+                 * If memory decoding is toggled avoid checking against the
+                 * same device, or else all regions will be removed from the
+                 * memory map in the unmap case.
+                 */
+                continue;
+        }
+
+        for ( i = 0; i < ARRAY_SIZE(tmp->vpci->header.bars); i++ )
+        {
+            const struct vpci_bar *bar = &tmp->vpci->header.bars[i];
+            unsigned long start = PFN_DOWN(bar->addr);
+            unsigned long end = PFN_DOWN(bar->addr + bar->size - 1);
+
+            if ( !bar->enabled || !rangeset_overlaps_range(mem, start, end) ||
+                 /*
+                  * If only the ROM enable bit is toggled check against other
+                  * BARs in the same device for overlaps, but not against the
+                  * same ROM BAR.
+                  */
+                 (rom_only && tmp == pdev && bar->type == VPCI_BAR_ROM) )
+                continue;
+
+            rc = rangeset_remove_range(mem, start, end);
+            if ( rc )
+            {
+                printk(XENLOG_G_WARNING "Failed to remove [%lx, %lx]: %d\n",
+                       start, end, rc);
+                rangeset_destroy(mem);
+                return rc;
+            }
+        }
+    }
+
+    ASSERT(dev);
+
+    if ( system_state < SYS_STATE_active )
+    {
+        /*
+         * Mappings might be created when building Dom0 if the memory decoding
+         * bit of PCI devices is enabled. In that case it's not possible to
+         * defer the operation, so call apply_map in order to create the
+         * mappings right away. Note that at build time this function will only
+         * be called iff the memory decoding bit is enabled, thus the operation
+         * will always be to establish mappings and process all the BARs.
+         */
+        ASSERT(map && !rom_only);
+        return apply_map(pdev->domain, pdev, mem);
+    }
+
+    defer_map(dev->domain, dev, mem, map, rom_only);
+
+    return 0;
+}
+
+static void cmd_write(const struct pci_dev *pdev, unsigned int reg,
+                      uint32_t cmd, void *data)
+{
+    uint8_t slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
+    uint16_t current_cmd = pci_conf_read16(pdev->seg, pdev->bus, slot, func,
+                                           reg);
+
+    /*
+     * Let Dom0 play with all the bits directly except for the memory
+     * decoding one.
+     */
+    if ( (cmd ^ current_cmd) & PCI_COMMAND_MEMORY )
+        /*
+         * Ignore the error. No memory has been added or removed from the p2m
+         * (because the actual p2m changes are deferred in defer_map) and the
+         * memory decoding bit has not been changed, so leave everything as-is,
+         * hoping the guest will realize and try again.
+         */
+        modify_bars(pdev, cmd & PCI_COMMAND_MEMORY, false);
+    else
+        pci_conf_write16(pdev->seg, pdev->bus, slot, func, reg, cmd);
+}
+
+static void bar_write(const struct pci_dev *pdev, unsigned int reg,
+                      uint32_t val, void *data)
+{
+    struct vpci_bar *bar = data;
+    uint8_t slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
+    bool hi = false;
+
+    if ( pci_conf_read16(pdev->seg, pdev->bus, slot, func, PCI_COMMAND) &
+         PCI_COMMAND_MEMORY )
+    {
+        gprintk(XENLOG_WARNING,
+                "%04x:%02x:%02x.%u: ignored BAR %lu write with memory decoding 
enabled\n",
+                pdev->seg, pdev->bus, slot, func,
+                bar - pdev->vpci->header.bars);
+        return;
+    }
+
+    if ( bar->type == VPCI_BAR_MEM64_HI )
+    {
+        ASSERT(reg > PCI_BASE_ADDRESS_0);
+        bar--;
+        hi = true;
+    }
+    else
+        val &= PCI_BASE_ADDRESS_MEM_MASK;
+
+    /*
+     * Update the cached address, so that when memory decoding is enabled
+     * Xen can map the BAR into the guest p2m.
+     */
+    bar->addr &= ~(0xffffffffull << (hi ? 32 : 0));
+    bar->addr |= (uint64_t)val << (hi ? 32 : 0);
+
+    /* Make sure Xen writes back the same value for the BAR RO bits. */
+    if ( !hi )
+    {
+        val |= bar->type == VPCI_BAR_MEM32 ? PCI_BASE_ADDRESS_MEM_TYPE_32
+                                           : PCI_BASE_ADDRESS_MEM_TYPE_64;
+        val |= bar->prefetchable ? PCI_BASE_ADDRESS_MEM_PREFETCH : 0;
+    }
+
+    pci_conf_write32(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
+                     PCI_FUNC(pdev->devfn), reg, val);
+}
+
+static void rom_write(const struct pci_dev *pdev, unsigned int reg,
+                      uint32_t val, void *data)
+{
+    struct vpci_header *header = &pdev->vpci->header;
+    struct vpci_bar *rom = data;
+    uint8_t slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
+    uint16_t cmd = pci_conf_read16(pdev->seg, pdev->bus, slot, func,
+                                   PCI_COMMAND);
+    bool new_enabled = val & PCI_ROM_ADDRESS_ENABLE;
+
+    if ( (cmd & PCI_COMMAND_MEMORY) && header->rom_enabled && new_enabled )
+    {
+        gprintk(XENLOG_WARNING,
+                "%04x:%02x:%02x.%u: ignored ROM BAR write with memory decoding 
enabled\n",
+                pdev->seg, pdev->bus, slot, func);
+        return;
+    }
+
+    if ( !header->rom_enabled )
+        /*
+         * If the ROM BAR is not enabled update the address field so the
+         * correct address is mapped into the p2m.
+         */
+        rom->addr = val & PCI_ROM_ADDRESS_MASK;
+
+    if ( !(cmd & PCI_COMMAND_MEMORY) || header->rom_enabled == new_enabled )
+    {
+        /* Just update the ROM BAR field. */
+        header->rom_enabled = new_enabled;
+        pci_conf_write32(pdev->seg, pdev->bus, slot, func, reg, val);
+    }
+    else if ( modify_bars(pdev, new_enabled, true) )
+        /*
+         * No memory has been added or removed from the p2m (because the actual
+         * p2m changes are deferred in defer_map) and the ROM enable bit has
+         * not been changed, so leave everything as-is, hoping the guest will
+         * realize and try again. It's important to not update rom->addr in the
+         * unmap case if modify_bars has failed, or future attempts would
+         * attempt to unmap the wrong address.
+         */
+        return;
+
+    if ( !new_enabled )
+        rom->addr = val & PCI_ROM_ADDRESS_MASK;
+}
+
+static int init_bars(struct pci_dev *pdev)
+{
+    uint8_t slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
+    uint16_t cmd;
+    uint64_t addr, size;
+    unsigned int i, num_bars, rom_reg;
+    struct vpci_header *header = &pdev->vpci->header;
+    struct vpci_bar *bars = header->bars;
+    pci_sbdf_t sbdf = {
+        .sbdf = PCI_SBDF3(pdev->seg, pdev->bus, pdev->devfn),
+    };
+    int rc;
+
+    switch ( pci_conf_read8(pdev->seg, pdev->bus, slot, func, PCI_HEADER_TYPE)
+             & 0x7f )
+    {
+    case PCI_HEADER_TYPE_NORMAL:
+        num_bars = PCI_HEADER_NORMAL_NR_BARS;
+        rom_reg = PCI_ROM_ADDRESS;
+        break;
+
+    case PCI_HEADER_TYPE_BRIDGE:
+        num_bars = PCI_HEADER_BRIDGE_NR_BARS;
+        rom_reg = PCI_ROM_ADDRESS1;
+        break;
+
+    default:
+        return -EOPNOTSUPP;
+    }
+
+    /* Setup a handler for the command register. */
+    rc = vpci_add_register(pdev->vpci, vpci_hw_read16, cmd_write, PCI_COMMAND,
+                           2, header);
+    if ( rc )
+        return rc;
+
+    /* Disable memory decoding before sizing. */
+    cmd = pci_conf_read16(pdev->seg, pdev->bus, slot, func, PCI_COMMAND);
+    if ( cmd & PCI_COMMAND_MEMORY )
+        pci_conf_write16(pdev->seg, pdev->bus, slot, func, PCI_COMMAND,
+                         cmd & ~PCI_COMMAND_MEMORY);
+
+    for ( i = 0; i < num_bars; i++ )
+    {
+        uint8_t reg = PCI_BASE_ADDRESS_0 + i * 4;
+        uint32_t val;
+
+        if ( i && bars[i - 1].type == VPCI_BAR_MEM64_LO )
+        {
+            bars[i].type = VPCI_BAR_MEM64_HI;
+            rc = vpci_add_register(pdev->vpci, vpci_hw_read32, bar_write, reg,
+                                   4, &bars[i]);
+            if ( rc )
+            {
+                pci_conf_write16(pdev->seg, pdev->bus, slot, func,
+                                 PCI_COMMAND, cmd);
+                return rc;
+            }
+
+            continue;
+        }
+
+        val = pci_conf_read32(pdev->seg, pdev->bus, slot, func, reg);
+        if ( (val & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_IO )
+        {
+            bars[i].type = VPCI_BAR_IO;
+            continue;
+        }
+        if ( (val & PCI_BASE_ADDRESS_MEM_TYPE_MASK) ==
+             PCI_BASE_ADDRESS_MEM_TYPE_64 )
+            bars[i].type = VPCI_BAR_MEM64_LO;
+        else
+            bars[i].type = VPCI_BAR_MEM32;
+
+        rc = pci_size_mem_bar(sbdf, reg, &addr, &size,
+                              (i == num_bars - 1) ? PCI_BAR_LAST : 0);
+        if ( rc < 0 )
+        {
+            pci_conf_write16(pdev->seg, pdev->bus, slot, func, PCI_COMMAND,
+                             cmd);
+            return rc;
+        }
+
+        if ( size == 0 )
+        {
+            bars[i].type = VPCI_BAR_EMPTY;
+            continue;
+        }
+
+        bars[i].addr = addr;
+        bars[i].size = size;
+        bars[i].prefetchable = val & PCI_BASE_ADDRESS_MEM_PREFETCH;
+
+        rc = vpci_add_register(pdev->vpci, vpci_hw_read32, bar_write, reg, 4,
+                               &bars[i]);
+        if ( rc )
+        {
+            pci_conf_write16(pdev->seg, pdev->bus, slot, func, PCI_COMMAND,
+                             cmd);
+            return rc;
+        }
+    }
+
+    /* Check expansion ROM. */
+    rc = pci_size_mem_bar(sbdf, rom_reg, &addr, &size, PCI_BAR_ROM);
+    if ( rc > 0 && size )
+    {
+        struct vpci_bar *rom = &header->bars[num_bars];
+
+        rom->type = VPCI_BAR_ROM;
+        rom->size = size;
+        rom->addr = addr;
+        header->rom_enabled = pci_conf_read32(pdev->seg, pdev->bus, slot, func,
+                                              rom_reg) & 
PCI_ROM_ADDRESS_ENABLE;
+
+        rc = vpci_add_register(pdev->vpci, vpci_hw_read32, rom_write, rom_reg,
+                               4, rom);
+        if ( rc )
+            rom->type = VPCI_BAR_EMPTY;
+    }
+
+    return (cmd & PCI_COMMAND_MEMORY) ? modify_bars(pdev, true, false) : 0;
+}
+REGISTER_VPCI_INIT(init_bars);
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c
index 4740d02edf..e5b49b9d82 100644
--- a/xen/drivers/vpci/vpci.c
+++ b/xen/drivers/vpci/vpci.c
@@ -34,6 +34,23 @@ struct vpci_register {
     struct list_head node;
 };
 
+void vpci_remove_device(struct pci_dev *pdev)
+{
+    spin_lock(&pdev->vpci->lock);
+    while ( !list_empty(&pdev->vpci->handlers) )
+    {
+        struct vpci_register *r = list_first_entry(&pdev->vpci->handlers,
+                                                   struct vpci_register,
+                                                   node);
+
+        list_del(&r->node);
+        xfree(r);
+    }
+    spin_unlock(&pdev->vpci->lock);
+    xfree(pdev->vpci);
+    pdev->vpci = NULL;
+}
+
 int __hwdom_init vpci_add_handlers(struct pci_dev *pdev)
 {
     unsigned int i;
@@ -57,19 +74,7 @@ int __hwdom_init vpci_add_handlers(struct pci_dev *pdev)
     }
 
     if ( rc )
-    {
-        while ( !list_empty(&pdev->vpci->handlers) )
-        {
-            struct vpci_register *r = list_first_entry(&pdev->vpci->handlers,
-                                                       struct vpci_register,
-                                                       node);
-
-            list_del(&r->node);
-            xfree(r);
-        }
-        xfree(pdev->vpci);
-        pdev->vpci = NULL;
-    }
+        vpci_remove_device(pdev);
 
     return rc;
 }
@@ -102,6 +107,20 @@ static void vpci_ignored_write(const struct pci_dev *pdev, 
unsigned int reg,
 {
 }
 
+uint32_t vpci_hw_read16(const struct pci_dev *pdev, unsigned int reg,
+                        void *data)
+{
+    return pci_conf_read16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
+                           PCI_FUNC(pdev->devfn), reg);
+}
+
+uint32_t vpci_hw_read32(const struct pci_dev *pdev, unsigned int reg,
+                        void *data)
+{
+    return pci_conf_read32(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
+                           PCI_FUNC(pdev->devfn), reg);
+}
+
 int vpci_add_register(struct vpci *vpci, vpci_read_t *read_handler,
                       vpci_write_t *write_handler, unsigned int offset,
                       unsigned int size, void *data)
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 3303fd9803..3aeb6eea77 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -20,6 +20,7 @@
 #include <xen/smp.h>
 #include <xen/perfc.h>
 #include <asm/atomic.h>
+#include <xen/vpci.h>
 #include <xen/wait.h>
 #include <public/xen.h>
 #include <public/domctl.h>
@@ -267,6 +268,9 @@ struct vcpu
 
     struct evtchn_fifo_vcpu *evtchn_fifo;
 
+    /* vPCI per-vCPU area, used to store data for long running operations. */
+    struct vpci_vcpu vpci;
+
     struct arch_vcpu arch;
 };
 
diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h
index 9f2864fb0c..6bf8b22b4f 100644
--- a/xen/include/xen/vpci.h
+++ b/xen/include/xen/vpci.h
@@ -1,6 +1,8 @@
 #ifndef _XEN_VPCI_H_
 #define _XEN_VPCI_H_
 
+#ifdef CONFIG_HAS_VPCI
+
 #include <xen/pci.h>
 #include <xen/types.h>
 #include <xen/list.h>
@@ -20,6 +22,9 @@ typedef int vpci_register_init_t(struct pci_dev *dev);
 /* Add vPCI handlers to device. */
 int __must_check vpci_add_handlers(struct pci_dev *dev);
 
+/* Remove all handlers and free vpci related structures. */
+void vpci_remove_device(struct pci_dev *pdev);
+
 /* Add/remove a register handler. */
 int __must_check vpci_add_register(struct vpci *vpci,
                                    vpci_read_t *read_handler,
@@ -34,12 +39,68 @@ uint32_t vpci_read(pci_sbdf_t sbdf, unsigned int reg, 
unsigned int size);
 void vpci_write(pci_sbdf_t sbdf, unsigned int reg, unsigned int size,
                 uint32_t data);
 
+/* Passthrough handlers. */
+uint32_t vpci_hw_read16(const struct pci_dev *pdev, unsigned int reg,
+                        void *data);
+uint32_t vpci_hw_read32(const struct pci_dev *pdev, unsigned int reg,
+                        void *data);
+
+/*
+ * Check for pending vPCI operations on this vcpu. Returns true if the vcpu
+ * should not run.
+ */
+bool __must_check vpci_process_pending(struct vcpu *v);
+
 struct vpci {
     /* List of vPCI handlers for a device. */
     struct list_head handlers;
     spinlock_t lock;
+
+#ifdef __XEN__
+    /* Hide the rest of the vpci struct from the user-space test harness. */
+    struct vpci_header {
+        /* Information about the PCI BARs of this device. */
+        struct vpci_bar {
+            uint64_t addr;
+            uint64_t size;
+            enum {
+                VPCI_BAR_EMPTY,
+                VPCI_BAR_IO,
+                VPCI_BAR_MEM32,
+                VPCI_BAR_MEM64_LO,
+                VPCI_BAR_MEM64_HI,
+                VPCI_BAR_ROM,
+            } type;
+            bool prefetchable : 1;
+            /* Store whether the BAR is mapped into guest p2m. */
+            bool enabled      : 1;
+#define PCI_HEADER_NORMAL_NR_BARS        6
+#define PCI_HEADER_BRIDGE_NR_BARS        2
+        } bars[PCI_HEADER_NORMAL_NR_BARS + 1];
+        /* At most 6 BARS + 1 expansion ROM BAR. */
+
+        /*
+         * Store whether the ROM enable bit is set (doesn't imply ROM BAR
+         * is mapped into guest p2m) if there's a ROM BAR on the device.
+         */
+        bool rom_enabled      : 1;
+        /* FIXME: currently there's no support for SR-IOV. */
+    } header;
+#endif
+};
+
+struct vpci_vcpu {
+    /* Per-vcpu structure to store state while {un}mapping of PCI BARs. */
+    struct rangeset *mem;
+    struct pci_dev *pdev;
+    bool map      : 1;
+    bool rom_only : 1;
 };
 
+#else /* !CONFIG_HAS_VPCI */
+struct vpci_vcpu {};
+#endif
+
 #endif
 
 /*
--
generated by git-patchbot for /home/xen/git/xen.git#master

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/xen-changelog

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.