[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH RFC 11/12] xen/x86: allow a PVHv2 Dom0 to register PCI devices with Xen



This patch allows a PVHv2 Dom0 to register PCI devices with Xen using
PHYSDEVOP_pci_mmcfg_reserved, PHYSDEVOP_pci_device_add and
PHYSDEVOP_pci_device_remove. The functionality of the pci_device_add
function has been expanded so that for PVHv2 Dom0 it also sizes the PCI
device BARs and adds them to the Dom0 memory map using a 1:1 mapping.

Signed-off-by: Roger Pau Monne <roger.pau@xxxxxxxxxx>
---
Cc: Jan Beulich <jbeulich@xxxxxxxx>
Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
---
This is incomplete, devices with SR-IOV BARs are not going to be properly
mapped, pci_remove_device will not unmap the BARs, and if the Dom0 OS
changes the position of the BARs the 1:1 mappings are not going to be
updated, so Dom0 is going to lose access to them...
---
 xen/arch/x86/hvm/hvm.c        |   6 ++
 xen/drivers/passthrough/pci.c | 153 +++++++++++++++++++++++++++++++++---------
 2 files changed, 127 insertions(+), 32 deletions(-)

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index db4b2d6..9434540 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -4029,6 +4029,12 @@ static long hvm_physdev_op(int cmd, 
XEN_GUEST_HANDLE_PARAM(void) arg)
         if ( !is_pvh_vcpu(current) || !is_hardware_domain(current->domain) )
             return -ENOSYS;
         /* fall through */
+    case PHYSDEVOP_pci_mmcfg_reserved:
+    case PHYSDEVOP_pci_device_add:
+    case PHYSDEVOP_pci_device_remove:
+        if ( !is_hardware_domain(current->domain) )
+            return -ENOSYS;
+        /* fall through */
     case PHYSDEVOP_map_pirq:
     case PHYSDEVOP_unmap_pirq:
     case PHYSDEVOP_eoi:
diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
index e3595a9..18687e0 100644
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -587,6 +587,52 @@ static void pci_enable_acs(struct pci_dev *pdev)
     pci_conf_write16(seg, bus, dev, func, pos + PCI_ACS_CTRL, ctrl);
 }
 
+static int pci_size_bar(unsigned int seg, unsigned int bus, unsigned int slot,
+                        unsigned int func, unsigned int base,
+                        unsigned int max_bars, unsigned int *index,
+                        uint64_t *addr, uint64_t *size)
+{
+    unsigned int idx = base + *index * 4;
+    u32 bar = pci_conf_read32(seg, bus, slot, func, idx);
+    u32 hi = 0;
+
+    *addr = *size = 0;
+
+    ASSERT((bar & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_MEMORY);
+    pci_conf_write32(seg, bus, slot, func, idx, ~0);
+    if ( (bar & PCI_BASE_ADDRESS_MEM_TYPE_MASK) ==
+         PCI_BASE_ADDRESS_MEM_TYPE_64 )
+    {
+        if ( *index >= max_bars )
+        {
+            printk(XENLOG_WARNING
+                   "device %04x:%02x:%02x.%u with 64-bit BAR in last slot\n",
+                   seg, bus, slot, func);
+            return -EINVAL;
+        }
+        hi = pci_conf_read32(seg, bus, slot, func, idx + 4);
+        pci_conf_write32(seg, bus, slot, func, idx + 4, ~0);
+    }
+    *size = pci_conf_read32(seg, bus, slot, func, idx) &
+            PCI_BASE_ADDRESS_MEM_MASK;
+    if ( (bar & PCI_BASE_ADDRESS_MEM_TYPE_MASK) ==
+         PCI_BASE_ADDRESS_MEM_TYPE_64 )
+    {
+        *size |= (u64)pci_conf_read32(seg, bus, slot, func, idx + 4) << 32;
+        pci_conf_write32(seg, bus, slot, func, idx + 4, hi);
+    }
+    else if ( *size )
+        *size |= (u64)~0 << 32;
+    pci_conf_write32(seg, bus, slot, func, idx, bar);
+    *size = - *size;
+    *addr = (bar & PCI_BASE_ADDRESS_MEM_MASK) | ((u64)hi << 32);
+    if ( (bar & PCI_BASE_ADDRESS_MEM_TYPE_MASK) ==
+         PCI_BASE_ADDRESS_MEM_TYPE_64 )
+        ++*index;
+
+    return 0;
+}
+
 int pci_add_device(u16 seg, u8 bus, u8 devfn,
                    const struct pci_dev_info *info, nodeid_t node)
 {
@@ -651,7 +697,7 @@ int pci_add_device(u16 seg, u8 bus, u8 devfn,
             {
                 unsigned int idx = pos + PCI_SRIOV_BAR + i * 4;
                 u32 bar = pci_conf_read32(seg, bus, slot, func, idx);
-                u32 hi = 0;
+                uint64_t addr;
 
                 if ( (bar & PCI_BASE_ADDRESS_SPACE) ==
                      PCI_BASE_ADDRESS_SPACE_IO )
@@ -662,38 +708,13 @@ int pci_add_device(u16 seg, u8 bus, u8 devfn,
                            seg, bus, slot, func, i);
                     continue;
                 }
-                pci_conf_write32(seg, bus, slot, func, idx, ~0);
-                if ( (bar & PCI_BASE_ADDRESS_MEM_TYPE_MASK) ==
-                     PCI_BASE_ADDRESS_MEM_TYPE_64 )
-                {
-                    if ( i >= PCI_SRIOV_NUM_BARS )
-                    {
+                ret = pci_size_bar(seg, bus, slot, func, pos + PCI_SRIOV_BAR,
+                                   PCI_SRIOV_NUM_BARS, &i, &addr,
+                                   &pdev->vf_rlen[i]);
+                if ( ret )
                         printk(XENLOG_WARNING
-                               "SR-IOV device %04x:%02x:%02x.%u with 64-bit"
-                               " vf BAR in last slot\n",
-                               seg, bus, slot, func);
-                        break;
-                    }
-                    hi = pci_conf_read32(seg, bus, slot, func, idx + 4);
-                    pci_conf_write32(seg, bus, slot, func, idx + 4, ~0);
-                }
-                pdev->vf_rlen[i] = pci_conf_read32(seg, bus, slot, func, idx) &
-                                   PCI_BASE_ADDRESS_MEM_MASK;
-                if ( (bar & PCI_BASE_ADDRESS_MEM_TYPE_MASK) ==
-                     PCI_BASE_ADDRESS_MEM_TYPE_64 )
-                {
-                    pdev->vf_rlen[i] |= (u64)pci_conf_read32(seg, bus,
-                                                             slot, func,
-                                                             idx + 4) << 32;
-                    pci_conf_write32(seg, bus, slot, func, idx + 4, hi);
-                }
-                else if ( pdev->vf_rlen[i] )
-                    pdev->vf_rlen[i] |= (u64)~0 << 32;
-                pci_conf_write32(seg, bus, slot, func, idx, bar);
-                pdev->vf_rlen[i] = -pdev->vf_rlen[i];
-                if ( (bar & PCI_BASE_ADDRESS_MEM_TYPE_MASK) ==
-                     PCI_BASE_ADDRESS_MEM_TYPE_64 )
-                    ++i;
+                    "failed to size BAR%u of SR-IOV device %04x:%02x:%02x\n",
+                    i, seg, bus, slot, func);
             }
         }
         else
@@ -723,6 +744,74 @@ int pci_add_device(u16 seg, u8 bus, u8 devfn,
 
     pci_enable_acs(pdev);
 
+    if ( is_hvm_domain(current->domain) )
+    {
+        unsigned int i, num_bars;
+        u8 htype;
+
+        ASSERT(is_hardware_domain(current->domain));
+
+        htype = pci_conf_read8(seg, bus, slot, func, PCI_HEADER_TYPE);
+
+        switch ( htype )
+        {
+            case PCI_HEADER_TYPE_NORMAL:
+                num_bars = 6;
+                break;
+            case PCI_HEADER_TYPE_BRIDGE:
+                num_bars = 2;
+                break;
+            default:
+                printk(XENLOG_WARNING
+                       "PCI device %04x:%02x:%02x.%u type %u not supported\n",
+                       seg, bus, slot, func, htype);
+                num_bars = 0;
+                break;
+        }
+
+        for ( i = 0; i < num_bars; i++ )
+        {
+            u32 bar, nr_pages;
+            u64 addr, size, bar_pfn;
+
+            bar = pci_conf_read32(seg, bus, slot, func,
+                                  PCI_BASE_ADDRESS_0 + i * 4);
+
+            if ( (bar & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_IO )
+            {
+                printk(XENLOG_DEBUG "PCI %04x:%02x:%02x.%u: found IO BAR#%u\n",
+                       seg, bus, slot, func, i);
+                continue;
+            }
+
+            ret = pci_size_bar(seg, bus, slot, func, PCI_BASE_ADDRESS_0,
+                               num_bars, &i, &addr, &size);
+            if ( ret )
+            {
+                printk(XENLOG_WARNING
+                       "failed to size BAR%u of device %04x:%02x:%02x.%u\n",
+                       i, seg, bus, slot, func);
+                continue;
+            }
+
+            printk(XENLOG_DEBUG
+                "PCI %04x:%02x:%02x.%u: adding BAR#%u addr %#lx size %#lx\n",
+                seg, bus, slot, func, i, addr, size);
+
+            nr_pages = DIV_ROUND_UP(size, PAGE_SIZE);
+            bar_pfn = PFN_DOWN(addr);
+            ret = map_mmio_regions(current->domain, _gfn(bar_pfn), nr_pages,
+                                   _mfn(bar_pfn));
+            if ( ret )
+            {
+                printk(XENLOG_WARNING
+                    "Failed to map %#lx - %#lx into Dom0 memory map: %d\n",
+                    bar_pfn, bar_pfn + nr_pages, ret);
+                return ret;
+            }
+        }
+    }
+
 out:
     pcidevs_unlock();
     if ( !ret )
-- 
2.7.4 (Apple Git-66)


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.