[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH, RFC 6/7] IOMMU: add phantom function support


  • To: "xen-devel" <xen-devel@xxxxxxxxxxxxx>
  • From: "Jan Beulich" <JBeulich@xxxxxxxx>
  • Date: Wed, 28 Nov 2012 09:52:32 +0000
  • Delivery-date: Wed, 28 Nov 2012 09:51:52 +0000
  • List-id: Xen developer discussion <xen-devel.lists.xen.org>

Apart from generating device context entries for the base function,
all phantom functions also need context entries to be generated for
them.

In order to distinguish different use cases, a variant of
pci_get_pdev() is being introduced that, even when passed a phantom
function number, would return the underlying actual device.

--- a/xen/drivers/passthrough/amd/iommu_cmd.c
+++ b/xen/drivers/passthrough/amd/iommu_cmd.c
@@ -339,7 +339,15 @@ static void amd_iommu_flush_all_iotlbs(s
         return;
 
     for_each_pdev( d, pdev )
-        amd_iommu_flush_iotlb(pdev->devfn, pdev, gaddr, order);
+    {
+        u8 devfn = pdev->devfn;
+
+        do {
+            amd_iommu_flush_iotlb(devfn, pdev, gaddr, order);
+            devfn += pdev->phantom_stride;
+        } while ( devfn != pdev->devfn &&
+                  PCI_SLOT(devfn) == PCI_SLOT(pdev->devfn) );
+    }
 }
 
 /* Flush iommu cache after p2m changes. */
--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -667,7 +667,7 @@ void parse_ppr_log_entry(struct amd_iomm
     devfn = PCI_DEVFN2(device_id);
 
     spin_lock(&pcidevs_lock);
-    pdev = pci_get_pdev(iommu->seg, bus, devfn);
+    pdev = pci_get_real_pdev(iommu->seg, bus, devfn);
     spin_unlock(&pcidevs_lock);
 
     if ( pdev )
--- a/xen/drivers/passthrough/amd/iommu_map.c
+++ b/xen/drivers/passthrough/amd/iommu_map.c
@@ -598,7 +598,6 @@ static int update_paging_mode(struct dom
         for_each_pdev( d, pdev )
         {
             bdf = PCI_BDF2(pdev->bus, pdev->devfn);
-            req_id = get_dma_requestor_id(pdev->seg, bdf);
             iommu = find_iommu_for_device(pdev->seg, bdf);
             if ( !iommu )
             {
@@ -607,16 +606,21 @@ static int update_paging_mode(struct dom
             }
 
             spin_lock_irqsave(&iommu->lock, flags);
-            device_entry = iommu->dev_table.buffer +
-                           (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE);
-
-            /* valid = 0 only works for dom0 passthrough mode */
-            amd_iommu_set_root_page_table((u32 *)device_entry,
-                                          page_to_maddr(hd->root_table),
-                                          hd->domain_id,
-                                          hd->paging_mode, 1);
-
-            amd_iommu_flush_device(iommu, req_id);
+            do {
+                req_id = get_dma_requestor_id(pdev->seg, bdf);
+                device_entry = iommu->dev_table.buffer +
+                               (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE);
+
+                /* valid = 0 only works for dom0 passthrough mode */
+                amd_iommu_set_root_page_table((u32 *)device_entry,
+                                              page_to_maddr(hd->root_table),
+                                              hd->domain_id,
+                                              hd->paging_mode, 1);
+
+                amd_iommu_flush_device(iommu, req_id);
+                bdf += pdev->phantom_stride;
+            } while ( PCI_DEVFN2(bdf) != pdev->devfn &&
+                      PCI_SLOT(bdf) == PCI_SLOT(pdev->devfn) );
             spin_unlock_irqrestore(&iommu->lock, flags);
         }
 
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -154,6 +154,8 @@ void __init iommu_dom0_init(struct domai
 int iommu_add_device(struct pci_dev *pdev)
 {
     struct hvm_iommu *hd;
+    int rc;
+    u8 devfn;
 
     if ( !pdev->domain )
         return -EINVAL;
@@ -164,7 +166,20 @@ int iommu_add_device(struct pci_dev *pde
     if ( !iommu_enabled || !hd->platform_ops )
         return 0;
 
-    return hd->platform_ops->add_device(pdev->devfn, pdev);
+    rc = hd->platform_ops->add_device(pdev->devfn, pdev);
+    if ( rc || !pdev->phantom_stride )
+        return rc;
+
+    for ( devfn = pdev->devfn ; ; )
+    {
+        devfn += pdev->phantom_stride;
+        if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
+            return 0;
+        rc = hd->platform_ops->add_device(devfn, pdev);
+        if ( rc )
+            printk(XENLOG_WARNING "IOMMU: add %04x:%02x:%02x.%u failed (%d)\n",
+                   pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn), rc);
+    }
 }
 
 int iommu_enable_device(struct pci_dev *pdev)
@@ -187,6 +202,8 @@ int iommu_enable_device(struct pci_dev *
 int iommu_remove_device(struct pci_dev *pdev)
 {
     struct hvm_iommu *hd;
+    u8 devfn;
+
     if ( !pdev->domain )
         return -EINVAL;
 
@@ -194,6 +211,22 @@ int iommu_remove_device(struct pci_dev *
     if ( !iommu_enabled || !hd->platform_ops )
         return 0;
 
+    for ( devfn = pdev->devfn ; pdev->phantom_stride; )
+    {
+        int rc;
+
+        devfn += pdev->phantom_stride;
+        if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
+            break;
+        rc = hd->platform_ops->remove_device(devfn, pdev);
+        if ( !rc )
+            continue;
+
+        printk(XENLOG_ERR "IOMMU: remove %04x:%02x:%02x.%u failed (%d)\n",
+               pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn), rc);
+        return rc;
+    }
+
     return hd->platform_ops->remove_device(pdev->devfn, pdev);
 }
 
@@ -241,6 +274,18 @@ static int assign_device(struct domain *
     if ( (rc = hd->platform_ops->assign_device(d, devfn, pdev)) )
         goto done;
 
+    for ( ; pdev->phantom_stride; rc = 0 )
+    {
+        devfn += pdev->phantom_stride;
+        if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
+            break;
+        rc = hd->platform_ops->assign_device(d, devfn, pdev);
+        if ( rc )
+            printk(XENLOG_G_WARNING "d%d: assign %04x:%02x:%02x.%u failed 
(%d)\n",
+                   d->domain_id, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+                   rc);
+    }
+
     if ( has_arch_pdevs(d) && !need_iommu(d) )
     {
         d->need_iommu = 1;
@@ -373,6 +418,21 @@ int deassign_device(struct domain *d, u1
     if ( !pdev )
         return -ENODEV;
 
+    while ( pdev->phantom_stride )
+    {
+        devfn += pdev->phantom_stride;
+        if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
+            break;
+        ret = hd->platform_ops->reassign_device(d, dom0, devfn, pdev);
+        if ( !ret )
+            continue;
+
+        printk(XENLOG_G_ERR "d%d: deassign %04x:%02x:%02x.%u failed (%d)\n",
+               d->domain_id, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), ret);
+        return ret;
+    }
+
+    devfn = pdev->devfn;
     ret = hd->platform_ops->reassign_device(d, dom0, devfn, pdev);
     if ( ret )
     {
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -144,6 +144,8 @@ static struct pci_dev *alloc_pdev(struct
     /* update bus2bridge */
     switch ( pdev->type = pdev_type(pseg->nr, bus, devfn) )
     {
+        int pos;
+        u16 cap;
         u8 sec_bus, sub_bus;
 
         case DEV_TYPE_PCIe_BRIDGE:
@@ -167,6 +169,20 @@ static struct pci_dev *alloc_pdev(struct
             break;
 
         case DEV_TYPE_PCIe_ENDPOINT:
+            pos = pci_find_cap_offset(pseg->nr, bus, PCI_SLOT(devfn),
+                                      PCI_FUNC(devfn), PCI_CAP_ID_EXP);
+            BUG_ON(!pos);
+            cap = pci_conf_read16(pseg->nr, bus, PCI_SLOT(devfn),
+                                  PCI_FUNC(devfn), pos + PCI_EXP_DEVCAP);
+            if ( cap & PCI_EXP_DEVCAP_PHANTOM )
+            {
+                pdev->phantom_stride = 8 >> MASK_EXTR(cap,
+                                                      PCI_EXP_DEVCAP_PHANTOM);
+                if ( PCI_FUNC(devfn) >= pdev->phantom_stride )
+                    pdev->phantom_stride = 0;
+            }
+            break;
+
         case DEV_TYPE_PCI:
             break;
 
@@ -290,6 +306,27 @@ struct pci_dev *pci_get_pdev(int seg, in
     return NULL;
 }
 
+struct pci_dev *pci_get_real_pdev(int seg, int bus, int devfn)
+{
+    struct pci_dev *pdev;
+    int stride;
+
+    if ( seg < 0 || bus < 0 || devfn < 0 )
+        return NULL;
+
+    for ( pdev = pci_get_pdev(seg, bus, devfn), stride = 4;
+          !pdev && stride; stride >>= 1 )
+    {
+        if ( !(devfn & (8 - stride)) )
+            continue;
+        pdev = pci_get_pdev(seg, bus, devfn & ~(8 - stride));
+        if ( pdev && stride != pdev->phantom_stride )
+            pdev = NULL;
+    }
+
+    return pdev;
+}
+
 struct pci_dev *pci_get_pdev_by_domain(
     struct domain *d, int seg, int bus, int devfn)
 {
@@ -488,8 +525,19 @@ int pci_add_device(u16 seg, u8 bus, u8 d
 
 out:
     spin_unlock(&pcidevs_lock);
-    printk(XENLOG_DEBUG "PCI add %s %04x:%02x:%02x.%u\n", pdev_type,
-           seg, bus, slot, func);
+    if ( !ret )
+    {
+        printk(XENLOG_DEBUG "PCI add %s %04x:%02x:%02x.%u\n", pdev_type,
+               seg, bus, slot, func);
+        while ( pdev->phantom_stride )
+        {
+            func += pdev->phantom_stride;
+            if ( PCI_SLOT(func) )
+                break;
+            printk(XENLOG_DEBUG "PCI phantom %04x:%02x:%02x.%u\n",
+                   seg, bus, slot, func);
+        }
+    }
     return ret;
 }
 
@@ -681,7 +729,7 @@ void pci_check_disable_device(u16 seg, u
     u16 cword;
 
     spin_lock(&pcidevs_lock);
-    pdev = pci_get_pdev(seg, bus, devfn);
+    pdev = pci_get_real_pdev(seg, bus, devfn);
     if ( pdev )
     {
         if ( now < pdev->fault.time ||
@@ -698,6 +746,7 @@ void pci_check_disable_device(u16 seg, u
 
     /* Tell the device to stop DMAing; we can't rely on the guest to
      * control it for us. */
+    devfn = pdev->devfn;
     cword = pci_conf_read16(seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
                             PCI_COMMAND);
     pci_conf_write16(seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
@@ -759,6 +808,27 @@ struct setup_dom0 {
     int (*handler)(u8 devfn, struct pci_dev *);
 };
 
+static void setup_one_dom0_device(const struct setup_dom0 *ctxt,
+                                  struct pci_dev *pdev)
+{
+    u8 devfn = pdev->devfn;
+
+    do {
+        int err = ctxt->handler(devfn, pdev);
+
+        if ( err )
+        {
+            printk(XENLOG_ERR "setup %04x:%02x:%02x.%u for d%d failed (%d)\n",
+                   pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+                   ctxt->d->domain_id, err);
+            if ( devfn == pdev->devfn )
+                return;
+        }
+        devfn += pdev->phantom_stride;
+    } while ( devfn != pdev->devfn &&
+              PCI_SLOT(devfn) == PCI_SLOT(pdev->devfn) );
+}
+
 static int __init _setup_dom0_pci_devices(struct pci_seg *pseg, void *arg)
 {
     struct setup_dom0 *ctxt = arg;
@@ -777,12 +847,12 @@ static int __init _setup_dom0_pci_device
             {
                 pdev->domain = ctxt->d;
                 list_add(&pdev->domain_list, &ctxt->d->arch.pdev_list);
-                ctxt->handler(devfn, pdev);
+                setup_one_dom0_device(ctxt, pdev);
             }
             else if ( pdev->domain == dom_xen )
             {
                 pdev->domain = ctxt->d;
-                ctxt->handler(devfn, pdev);
+                setup_one_dom0_device(ctxt, pdev);
                 pdev->domain = dom_xen;
             }
             else if ( pdev->domain != ctxt->d )
--- a/xen/include/xen/lib.h
+++ b/xen/include/xen/lib.h
@@ -58,6 +58,9 @@ do {                                    
 
 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]) + __must_be_array(x))
 
+#define MASK_EXTR(v, m) (((v) & (m)) / ((m) & -(m)))
+#define MASK_INSR(v, m) (((v) * ((m) & -(m))) & (m))
+
 #define reserve_bootmem(_p,_l) ((void)0)
 
 struct domain;
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -63,6 +63,8 @@ struct pci_dev {
     const u8 bus;
     const u8 devfn;
 
+    u8 phantom_stride;
+
     enum pdev_type {
         DEV_TYPE_PCI_UNKNOWN,
         DEV_TYPE_PCIe_ENDPOINT,
@@ -114,6 +116,7 @@ int pci_ro_device(int seg, int bus, int 
 void arch_pci_ro_device(int seg, int bdf);
 int pci_hide_device(int bus, int devfn);
 struct pci_dev *pci_get_pdev(int seg, int bus, int devfn);
+struct pci_dev *pci_get_real_pdev(int seg, int bus, int devfn);
 struct pci_dev *pci_get_pdev_by_domain(
     struct domain *, int seg, int bus, int devfn);
 void pci_check_disable_device(u16 seg, u8 bus, u8 devfn);

Attachment: IOMMU-phantom-dev.patch
Description: Text document

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.