[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] IOMMU: add phantom function support


  • To: xen-changelog@xxxxxxxxxxxxxxxxxxx
  • From: Xen patchbot-unstable <patchbot@xxxxxxx>
  • Date: Tue, 08 Jan 2013 13:22:10 +0000
  • Delivery-date: Tue, 08 Jan 2013 13:22:33 +0000
  • List-id: "Change log for Mercurial \(receive only\)" <xen-changelog.lists.xen.org>

# HG changeset patch
# User Jan Beulich <jbeulich@xxxxxxxx>
# Date 1357559742 -3600
# Node ID c9a01b396cb4eaedef30e9a6ed615115a9f8bfc5
# Parent  11fa145c880ee814aaf56a7f47f47ee3e5560c7c
IOMMU: add phantom function support

Apart from generating device context entries for the base function,
all phantom functions also need context entries to be generated for
them.

In order to distinguish different use cases, a variant of
pci_get_pdev() is being introduced that, even when passed a phantom
function number, would return the underlying actual device.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Acked-by: "Zhang, Xiantao" <xiantao.zhang@xxxxxxxxx>
---


diff -r 11fa145c880e -r c9a01b396cb4 xen/drivers/passthrough/amd/iommu_cmd.c
--- a/xen/drivers/passthrough/amd/iommu_cmd.c   Mon Jan 07 12:54:39 2013 +0100
+++ b/xen/drivers/passthrough/amd/iommu_cmd.c   Mon Jan 07 12:55:42 2013 +0100
@@ -339,7 +339,15 @@ static void amd_iommu_flush_all_iotlbs(s
         return;
 
     for_each_pdev( d, pdev )
-        amd_iommu_flush_iotlb(pdev->devfn, pdev, gaddr, order);
+    {
+        u8 devfn = pdev->devfn;
+
+        do {
+            amd_iommu_flush_iotlb(devfn, pdev, gaddr, order);
+            devfn += pdev->phantom_stride;
+        } while ( devfn != pdev->devfn &&
+                  PCI_SLOT(devfn) == PCI_SLOT(pdev->devfn) );
+    }
 }
 
 /* Flush iommu cache after p2m changes. */
diff -r 11fa145c880e -r c9a01b396cb4 xen/drivers/passthrough/amd/iommu_init.c
--- a/xen/drivers/passthrough/amd/iommu_init.c  Mon Jan 07 12:54:39 2013 +0100
+++ b/xen/drivers/passthrough/amd/iommu_init.c  Mon Jan 07 12:55:42 2013 +0100
@@ -667,7 +667,7 @@ void parse_ppr_log_entry(struct amd_iomm
     devfn = PCI_DEVFN2(device_id);
 
     spin_lock(&pcidevs_lock);
-    pdev = pci_get_pdev(iommu->seg, bus, devfn);
+    pdev = pci_get_real_pdev(iommu->seg, bus, devfn);
     spin_unlock(&pcidevs_lock);
 
     if ( pdev )
diff -r 11fa145c880e -r c9a01b396cb4 xen/drivers/passthrough/amd/iommu_map.c
--- a/xen/drivers/passthrough/amd/iommu_map.c   Mon Jan 07 12:54:39 2013 +0100
+++ b/xen/drivers/passthrough/amd/iommu_map.c   Mon Jan 07 12:55:42 2013 +0100
@@ -598,7 +598,6 @@ static int update_paging_mode(struct dom
         for_each_pdev( d, pdev )
         {
             bdf = PCI_BDF2(pdev->bus, pdev->devfn);
-            req_id = get_dma_requestor_id(pdev->seg, bdf);
             iommu = find_iommu_for_device(pdev->seg, bdf);
             if ( !iommu )
             {
@@ -607,16 +606,21 @@ static int update_paging_mode(struct dom
             }
 
             spin_lock_irqsave(&iommu->lock, flags);
-            device_entry = iommu->dev_table.buffer +
-                           (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE);
+            do {
+                req_id = get_dma_requestor_id(pdev->seg, bdf);
+                device_entry = iommu->dev_table.buffer +
+                               (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE);
 
-            /* valid = 0 only works for dom0 passthrough mode */
-            amd_iommu_set_root_page_table((u32 *)device_entry,
-                                          page_to_maddr(hd->root_table),
-                                          hd->domain_id,
-                                          hd->paging_mode, 1);
+                /* valid = 0 only works for dom0 passthrough mode */
+                amd_iommu_set_root_page_table((u32 *)device_entry,
+                                              page_to_maddr(hd->root_table),
+                                              hd->domain_id,
+                                              hd->paging_mode, 1);
 
-            amd_iommu_flush_device(iommu, req_id);
+                amd_iommu_flush_device(iommu, req_id);
+                bdf += pdev->phantom_stride;
+            } while ( PCI_DEVFN2(bdf) != pdev->devfn &&
+                      PCI_SLOT(bdf) == PCI_SLOT(pdev->devfn) );
             spin_unlock_irqrestore(&iommu->lock, flags);
         }
 
diff -r 11fa145c880e -r c9a01b396cb4 xen/drivers/passthrough/iommu.c
--- a/xen/drivers/passthrough/iommu.c   Mon Jan 07 12:54:39 2013 +0100
+++ b/xen/drivers/passthrough/iommu.c   Mon Jan 07 12:55:42 2013 +0100
@@ -158,6 +158,8 @@ void __init iommu_dom0_init(struct domai
 int iommu_add_device(struct pci_dev *pdev)
 {
     struct hvm_iommu *hd;
+    int rc;
+    u8 devfn;
 
     if ( !pdev->domain )
         return -EINVAL;
@@ -168,7 +170,20 @@ int iommu_add_device(struct pci_dev *pde
     if ( !iommu_enabled || !hd->platform_ops )
         return 0;
 
-    return hd->platform_ops->add_device(pdev->devfn, pdev);
+    rc = hd->platform_ops->add_device(pdev->devfn, pdev);
+    if ( rc || !pdev->phantom_stride )
+        return rc;
+
+    for ( devfn = pdev->devfn ; ; )
+    {
+        devfn += pdev->phantom_stride;
+        if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
+            return 0;
+        rc = hd->platform_ops->add_device(devfn, pdev);
+        if ( rc )
+            printk(XENLOG_WARNING "IOMMU: add %04x:%02x:%02x.%u failed (%d)\n",
+                   pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn), rc);
+    }
 }
 
 int iommu_enable_device(struct pci_dev *pdev)
@@ -191,6 +206,8 @@ int iommu_enable_device(struct pci_dev *
 int iommu_remove_device(struct pci_dev *pdev)
 {
     struct hvm_iommu *hd;
+    u8 devfn;
+
     if ( !pdev->domain )
         return -EINVAL;
 
@@ -198,6 +215,22 @@ int iommu_remove_device(struct pci_dev *
     if ( !iommu_enabled || !hd->platform_ops )
         return 0;
 
+    for ( devfn = pdev->devfn ; pdev->phantom_stride; )
+    {
+        int rc;
+
+        devfn += pdev->phantom_stride;
+        if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
+            break;
+        rc = hd->platform_ops->remove_device(devfn, pdev);
+        if ( !rc )
+            continue;
+
+        printk(XENLOG_ERR "IOMMU: remove %04x:%02x:%02x.%u failed (%d)\n",
+               pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn), rc);
+        return rc;
+    }
+
     return hd->platform_ops->remove_device(pdev->devfn, pdev);
 }
 
@@ -245,6 +278,18 @@ static int assign_device(struct domain *
     if ( (rc = hd->platform_ops->assign_device(d, devfn, pdev)) )
         goto done;
 
+    for ( ; pdev->phantom_stride; rc = 0 )
+    {
+        devfn += pdev->phantom_stride;
+        if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
+            break;
+        rc = hd->platform_ops->assign_device(d, devfn, pdev);
+        if ( rc )
+            printk(XENLOG_G_WARNING "d%d: assign %04x:%02x:%02x.%u failed 
(%d)\n",
+                   d->domain_id, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+                   rc);
+    }
+
     if ( has_arch_pdevs(d) && !need_iommu(d) )
     {
         d->need_iommu = 1;
@@ -377,6 +422,21 @@ int deassign_device(struct domain *d, u1
     if ( !pdev )
         return -ENODEV;
 
+    while ( pdev->phantom_stride )
+    {
+        devfn += pdev->phantom_stride;
+        if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
+            break;
+        ret = hd->platform_ops->reassign_device(d, dom0, devfn, pdev);
+        if ( !ret )
+            continue;
+
+        printk(XENLOG_G_ERR "d%d: deassign %04x:%02x:%02x.%u failed (%d)\n",
+               d->domain_id, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), ret);
+        return ret;
+    }
+
+    devfn = pdev->devfn;
     ret = hd->platform_ops->reassign_device(d, dom0, devfn, pdev);
     if ( ret )
     {
diff -r 11fa145c880e -r c9a01b396cb4 xen/drivers/passthrough/pci.c
--- a/xen/drivers/passthrough/pci.c     Mon Jan 07 12:54:39 2013 +0100
+++ b/xen/drivers/passthrough/pci.c     Mon Jan 07 12:55:42 2013 +0100
@@ -144,6 +144,8 @@ static struct pci_dev *alloc_pdev(struct
     /* update bus2bridge */
     switch ( pdev->type = pdev_type(pseg->nr, bus, devfn) )
     {
+        int pos;
+        u16 cap;
         u8 sec_bus, sub_bus;
 
         case DEV_TYPE_PCIe_BRIDGE:
@@ -167,6 +169,20 @@ static struct pci_dev *alloc_pdev(struct
             break;
 
         case DEV_TYPE_PCIe_ENDPOINT:
+            pos = pci_find_cap_offset(pseg->nr, bus, PCI_SLOT(devfn),
+                                      PCI_FUNC(devfn), PCI_CAP_ID_EXP);
+            BUG_ON(!pos);
+            cap = pci_conf_read16(pseg->nr, bus, PCI_SLOT(devfn),
+                                  PCI_FUNC(devfn), pos + PCI_EXP_DEVCAP);
+            if ( cap & PCI_EXP_DEVCAP_PHANTOM )
+            {
+                pdev->phantom_stride = 8 >> MASK_EXTR(cap,
+                                                      PCI_EXP_DEVCAP_PHANTOM);
+                if ( PCI_FUNC(devfn) >= pdev->phantom_stride )
+                    pdev->phantom_stride = 0;
+            }
+            break;
+
         case DEV_TYPE_PCI:
             break;
 
@@ -290,6 +306,27 @@ struct pci_dev *pci_get_pdev(int seg, in
     return NULL;
 }
 
+struct pci_dev *pci_get_real_pdev(int seg, int bus, int devfn)
+{
+    struct pci_dev *pdev;
+    int stride;
+
+    if ( seg < 0 || bus < 0 || devfn < 0 )
+        return NULL;
+
+    for ( pdev = pci_get_pdev(seg, bus, devfn), stride = 4;
+          !pdev && stride; stride >>= 1 )
+    {
+        if ( !(devfn & (8 - stride)) )
+            continue;
+        pdev = pci_get_pdev(seg, bus, devfn & ~(8 - stride));
+        if ( pdev && stride != pdev->phantom_stride )
+            pdev = NULL;
+    }
+
+    return pdev;
+}
+
 struct pci_dev *pci_get_pdev_by_domain(
     struct domain *d, int seg, int bus, int devfn)
 {
@@ -488,8 +525,19 @@ int pci_add_device(u16 seg, u8 bus, u8 d
 
 out:
     spin_unlock(&pcidevs_lock);
-    printk(XENLOG_DEBUG "PCI add %s %04x:%02x:%02x.%u\n", pdev_type,
-           seg, bus, slot, func);
+    if ( !ret )
+    {
+        printk(XENLOG_DEBUG "PCI add %s %04x:%02x:%02x.%u\n", pdev_type,
+               seg, bus, slot, func);
+        while ( pdev->phantom_stride )
+        {
+            func += pdev->phantom_stride;
+            if ( PCI_SLOT(func) )
+                break;
+            printk(XENLOG_DEBUG "PCI phantom %04x:%02x:%02x.%u\n",
+                   seg, bus, slot, func);
+        }
+    }
     return ret;
 }
 
@@ -681,7 +729,7 @@ void pci_check_disable_device(u16 seg, u
     u16 cword;
 
     spin_lock(&pcidevs_lock);
-    pdev = pci_get_pdev(seg, bus, devfn);
+    pdev = pci_get_real_pdev(seg, bus, devfn);
     if ( pdev )
     {
         if ( now < pdev->fault.time ||
@@ -698,6 +746,7 @@ void pci_check_disable_device(u16 seg, u
 
     /* Tell the device to stop DMAing; we can't rely on the guest to
      * control it for us. */
+    devfn = pdev->devfn;
     cword = pci_conf_read16(seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
                             PCI_COMMAND);
     pci_conf_write16(seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
@@ -759,6 +808,27 @@ struct setup_dom0 {
     int (*handler)(u8 devfn, struct pci_dev *);
 };
 
+static void setup_one_dom0_device(const struct setup_dom0 *ctxt,
+                                  struct pci_dev *pdev)
+{
+    u8 devfn = pdev->devfn;
+
+    do {
+        int err = ctxt->handler(devfn, pdev);
+
+        if ( err )
+        {
+            printk(XENLOG_ERR "setup %04x:%02x:%02x.%u for d%d failed (%d)\n",
+                   pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+                   ctxt->d->domain_id, err);
+            if ( devfn == pdev->devfn )
+                return;
+        }
+        devfn += pdev->phantom_stride;
+    } while ( devfn != pdev->devfn &&
+              PCI_SLOT(devfn) == PCI_SLOT(pdev->devfn) );
+}
+
 static int __init _setup_dom0_pci_devices(struct pci_seg *pseg, void *arg)
 {
     struct setup_dom0 *ctxt = arg;
@@ -777,12 +847,12 @@ static int __init _setup_dom0_pci_device
             {
                 pdev->domain = ctxt->d;
                 list_add(&pdev->domain_list, &ctxt->d->arch.pdev_list);
-                ctxt->handler(devfn, pdev);
+                setup_one_dom0_device(ctxt, pdev);
             }
             else if ( pdev->domain == dom_xen )
             {
                 pdev->domain = ctxt->d;
-                ctxt->handler(devfn, pdev);
+                setup_one_dom0_device(ctxt, pdev);
                 pdev->domain = dom_xen;
             }
             else if ( pdev->domain != ctxt->d )
diff -r 11fa145c880e -r c9a01b396cb4 xen/include/xen/lib.h
--- a/xen/include/xen/lib.h     Mon Jan 07 12:54:39 2013 +0100
+++ b/xen/include/xen/lib.h     Mon Jan 07 12:55:42 2013 +0100
@@ -58,6 +58,9 @@ do {                                    
 
 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]) + __must_be_array(x))
 
+#define MASK_EXTR(v, m) (((v) & (m)) / ((m) & -(m)))
+#define MASK_INSR(v, m) (((v) * ((m) & -(m))) & (m))
+
 #define reserve_bootmem(_p,_l) ((void)0)
 
 struct domain;
diff -r 11fa145c880e -r c9a01b396cb4 xen/include/xen/pci.h
--- a/xen/include/xen/pci.h     Mon Jan 07 12:54:39 2013 +0100
+++ b/xen/include/xen/pci.h     Mon Jan 07 12:55:42 2013 +0100
@@ -63,6 +63,8 @@ struct pci_dev {
     const u8 bus;
     const u8 devfn;
 
+    u8 phantom_stride;
+
     enum pdev_type {
         DEV_TYPE_PCI_UNKNOWN,
         DEV_TYPE_PCIe_ENDPOINT,
@@ -114,6 +116,7 @@ int pci_ro_device(int seg, int bus, int 
 void arch_pci_ro_device(int seg, int bdf);
 int pci_hide_device(int bus, int devfn);
 struct pci_dev *pci_get_pdev(int seg, int bus, int devfn);
+struct pci_dev *pci_get_real_pdev(int seg, int bus, int devfn);
 struct pci_dev *pci_get_pdev_by_domain(
     struct domain *, int seg, int bus, int devfn);
 void pci_check_disable_device(u16 seg, u8 bus, u8 devfn);

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.