[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v13 3/3] IOMMU: fix vt-d Device-TLB flush timeout issue



From: Quan Xu <quan.xu@xxxxxxxxx>

If Device-TLB flush timed out, we hide the target ATS device
immediately. By hiding the device, we make sure it can't be
assigned to any domain any longer (see device_assigned).

Signed-off-by: Quan Xu <quan.xu@xxxxxxxxx>

CC: Jan Beulich <jbeulich@xxxxxxxx>
CC: Kevin Tian <kevin.tian@xxxxxxxxx>
CC: Feng Wu <feng.wu@xxxxxxxxx>

---
v13:
   1. drop domain crash logic, which is added to the vendor
      independent layer in patch #2.
   2. rename dev_invalidate_iotlb_timeout() to iommu_dev_iotlb_flush_timeout()
      and move it to the vendor independent layer.
---
 xen/drivers/passthrough/iommu.c       | 21 +++++++++++++
 xen/drivers/passthrough/pci.c         |  6 ++--
 xen/drivers/passthrough/vtd/extern.h  |  5 ++--
 xen/drivers/passthrough/vtd/qinval.c  | 56 +++++++++++++++++++++++++++--------
 xen/drivers/passthrough/vtd/x86/ats.c | 11 ++-----
 xen/include/xen/iommu.h               |  3 ++
 xen/include/xen/pci.h                 |  1 +
 7 files changed, 76 insertions(+), 27 deletions(-)

diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c
index d793f5d..5db8ae6 100644
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -361,6 +361,27 @@ int iommu_iotlb_flush_all(struct domain *d)
     return rc;
 }
 
+void iommu_dev_iotlb_flush_timeout(struct domain *d,
+                                   struct pci_dev *pdev)
+{
+    pcidevs_lock();
+
+    ASSERT(pdev->domain);
+    if ( d != pdev->domain )
+        return;
+
+    list_del(&pdev->domain_list);
+    pdev->domain = NULL;
+    pci_hide_existing_device(pdev);
+    if ( !d->is_shutting_down && printk_ratelimit() )
+        printk(XENLOG_ERR
+               "dom%d: ATS device %04x:%02x:%02x.%u flush failed\n",
+               d->domain_id, pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
+               PCI_FUNC(pdev->devfn));
+
+    pcidevs_unlock();
+}
+
 int __init iommu_setup(void)
 {
     int rc = -ENODEV;
diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
index bb5f344..58bfb79 100644
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -419,7 +419,7 @@ static void free_pdev(struct pci_seg *pseg, struct pci_dev 
*pdev)
     xfree(pdev);
 }
 
-static void _pci_hide_device(struct pci_dev *pdev)
+void pci_hide_existing_device(struct pci_dev *pdev)
 {
     if ( pdev->domain )
         return;
@@ -436,7 +436,7 @@ int __init pci_hide_device(int bus, int devfn)
     pdev = alloc_pdev(get_pseg(0), bus, devfn);
     if ( pdev )
     {
-        _pci_hide_device(pdev);
+        pci_hide_existing_device(pdev);
         rc = 0;
     }
     pcidevs_unlock();
@@ -466,7 +466,7 @@ int __init pci_ro_device(int seg, int bus, int devfn)
     }
 
     __set_bit(PCI_BDF2(bus, devfn), pseg->ro_map);
-    _pci_hide_device(pdev);
+    pci_hide_existing_device(pdev);
 
     return 0;
 }
diff --git a/xen/drivers/passthrough/vtd/extern.h 
b/xen/drivers/passthrough/vtd/extern.h
index 45357f2..efaff28 100644
--- a/xen/drivers/passthrough/vtd/extern.h
+++ b/xen/drivers/passthrough/vtd/extern.h
@@ -25,6 +25,7 @@
 
 #define VTDPREFIX "[VT-D]"
 
+struct pci_ats_dev;
 extern bool_t rwbf_quirk;
 
 void print_iommu_regs(struct acpi_drhd_unit *drhd);
@@ -60,8 +61,8 @@ int dev_invalidate_iotlb(struct iommu *iommu, u16 did,
                          u64 addr, unsigned int size_order, u64 type);
 
 int __must_check qinval_device_iotlb_sync(struct iommu *iommu,
-                                          u32 max_invs_pend,
-                                          u16 sid, u16 size, u64 addr);
+                                          struct pci_ats_dev *ats_dev,
+                                          u16 did, u16 size, u64 addr);
 
 unsigned int get_cache_line_size(void);
 void cacheline_flush(char *);
diff --git a/xen/drivers/passthrough/vtd/qinval.c 
b/xen/drivers/passthrough/vtd/qinval.c
index 4492b29..7a5c433 100644
--- a/xen/drivers/passthrough/vtd/qinval.c
+++ b/xen/drivers/passthrough/vtd/qinval.c
@@ -27,11 +27,11 @@
 #include "dmar.h"
 #include "vtd.h"
 #include "extern.h"
+#include "../ats.h"
 
 #define VTD_QI_TIMEOUT 1
 
-static int __must_check invalidate_sync(struct iommu *iommu,
-                                        bool_t flush_dev_iotlb);
+static int __must_check invalidate_sync(struct iommu *iommu);
 
 static void print_qi_regs(struct iommu *iommu)
 {
@@ -103,7 +103,7 @@ static int __must_check 
queue_invalidate_context_sync(struct iommu *iommu,
 
     unmap_vtd_domain_page(qinval_entries);
 
-    return invalidate_sync(iommu, 0);
+    return invalidate_sync(iommu);
 }
 
 static int __must_check queue_invalidate_iotlb_sync(struct iommu *iommu,
@@ -140,7 +140,7 @@ static int __must_check queue_invalidate_iotlb_sync(struct 
iommu *iommu,
     qinval_update_qtail(iommu, index);
     spin_unlock_irqrestore(&iommu->register_lock, flags);
 
-    return invalidate_sync(iommu, 0);
+    return invalidate_sync(iommu);
 }
 
 static int __must_check queue_invalidate_wait(struct iommu *iommu,
@@ -199,25 +199,55 @@ static int __must_check queue_invalidate_wait(struct 
iommu *iommu,
     return -EOPNOTSUPP;
 }
 
-static int __must_check invalidate_sync(struct iommu *iommu,
-                                        bool_t flush_dev_iotlb)
+static int __must_check invalidate_sync(struct iommu *iommu)
 {
     struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
 
     ASSERT(qi_ctrl->qinval_maddr);
 
-    return queue_invalidate_wait(iommu, 0, 1, 1, flush_dev_iotlb);
+    return queue_invalidate_wait(iommu, 0, 1, 1, 0);
+}
+
+static int __must_check dev_invalidate_sync(struct iommu *iommu,
+                                            struct pci_dev *pdev, u16 did)
+{
+    struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
+    int rc;
+
+    ASSERT(qi_ctrl->qinval_maddr);
+    rc = queue_invalidate_wait(iommu, 0, 1, 1, 1);
+    if ( rc == -ETIMEDOUT )
+    {
+        struct domain *d = NULL;
+
+        if ( test_bit(did, iommu->domid_bitmap) )
+            d = rcu_lock_domain_by_id(iommu->domid_map[did]);
+
+        /*
+         * In case the domain has been freed or the IOMMU domid bitmap is
+         * not valid, the device no longer belongs to this domain.
+         */
+        if ( d == NULL )
+            return rc;
+
+        iommu_dev_iotlb_flush_timeout(d, pdev);
+        rcu_unlock_domain(d);
+    }
+
+    return rc;
 }
 
 int qinval_device_iotlb_sync(struct iommu *iommu,
-                             u32 max_invs_pend,
-                             u16 sid, u16 size, u64 addr)
+                             struct pci_ats_dev *ats_dev,
+                             u16 did, u16 size, u64 addr)
 {
     unsigned long flags;
     unsigned int index;
     u64 entry_base;
     struct qinval_entry *qinval_entry, *qinval_entries;
+    struct pci_dev *pdev = ats_dev->pdev;
 
+    ASSERT(pdev);
     spin_lock_irqsave(&iommu->register_lock, flags);
     index = qinval_next_index(iommu);
     entry_base = iommu_qi_ctrl(iommu)->qinval_maddr +
@@ -227,9 +257,9 @@ int qinval_device_iotlb_sync(struct iommu *iommu,
 
     qinval_entry->q.dev_iotlb_inv_dsc.lo.type = TYPE_INVAL_DEVICE_IOTLB;
     qinval_entry->q.dev_iotlb_inv_dsc.lo.res_1 = 0;
-    qinval_entry->q.dev_iotlb_inv_dsc.lo.max_invs_pend = max_invs_pend;
+    qinval_entry->q.dev_iotlb_inv_dsc.lo.max_invs_pend = 
ats_dev->ats_queue_depth;
     qinval_entry->q.dev_iotlb_inv_dsc.lo.res_2 = 0;
-    qinval_entry->q.dev_iotlb_inv_dsc.lo.sid = sid;
+    qinval_entry->q.dev_iotlb_inv_dsc.lo.sid = PCI_BDF2(pdev->bus, 
pdev->devfn);
     qinval_entry->q.dev_iotlb_inv_dsc.lo.res_3 = 0;
 
     qinval_entry->q.dev_iotlb_inv_dsc.hi.size = size;
@@ -240,7 +270,7 @@ int qinval_device_iotlb_sync(struct iommu *iommu,
     qinval_update_qtail(iommu, index);
     spin_unlock_irqrestore(&iommu->register_lock, flags);
 
-    return invalidate_sync(iommu, 1);
+    return dev_invalidate_sync(iommu, pdev, did);
 }
 
 static int __must_check queue_invalidate_iec_sync(struct iommu *iommu,
@@ -271,7 +301,7 @@ static int __must_check queue_invalidate_iec_sync(struct 
iommu *iommu,
     qinval_update_qtail(iommu, index);
     spin_unlock_irqrestore(&iommu->register_lock, flags);
 
-    ret = invalidate_sync(iommu, 0);
+    ret = invalidate_sync(iommu);
 
     /*
      * reading vt-d architecture register will ensure
diff --git a/xen/drivers/passthrough/vtd/x86/ats.c 
b/xen/drivers/passthrough/vtd/x86/ats.c
index 11fe9bb..23e46b1 100644
--- a/xen/drivers/passthrough/vtd/x86/ats.c
+++ b/xen/drivers/passthrough/vtd/x86/ats.c
@@ -118,14 +118,9 @@ int dev_invalidate_iotlb(struct iommu *iommu, u16 did,
 
     list_for_each_entry( ats_dev, &ats_devices, list )
     {
-        struct pci_dev *pdev = ats_dev->pdev;
-        u16 sid;
         bool_t sbit;
         int rc = 0;
 
-        ASSERT(pdev);
-        sid = PCI_BDF2(pdev->bus, pdev->devfn);
-
         /* Only invalidate devices that belong to this IOMMU */
         if ( ats_dev->iommu != iommu )
             continue;
@@ -140,8 +135,7 @@ int dev_invalidate_iotlb(struct iommu *iommu, u16 did,
             /* invalidate all translations: sbit=1,bit_63=0,bit[62:12]=1 */
             sbit = 1;
             addr = (~0UL << PAGE_SHIFT_4K) & 0x7FFFFFFFFFFFFFFF;
-            rc = qinval_device_iotlb_sync(iommu, ats_dev->ats_queue_depth,
-                                          sid, sbit, addr);
+            rc = qinval_device_iotlb_sync(iommu, ats_dev, did, sbit, addr);
             break;
         case DMA_TLB_PSI_FLUSH:
             if ( !device_in_domain(iommu, ats_dev, did) )
@@ -160,8 +154,7 @@ int dev_invalidate_iotlb(struct iommu *iommu, u16 did,
                 addr |= (((u64)1 << (size_order - 1)) - 1) << PAGE_SHIFT_4K;
             }
 
-            rc = qinval_device_iotlb_sync(iommu, ats_dev->ats_queue_depth,
-                                          sid, sbit, addr);
+            rc = qinval_device_iotlb_sync(iommu, ats_dev, did, sbit, addr);
             break;
         default:
             dprintk(XENLOG_WARNING VTDPREFIX, "invalid vt-d flush type\n");
diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h
index a759f2b..e0f7d52 100644
--- a/xen/include/xen/iommu.h
+++ b/xen/include/xen/iommu.h
@@ -207,6 +207,9 @@ int __must_check iommu_iotlb_flush(struct domain *d, 
unsigned long gfn,
                                    unsigned int page_count);
 int __must_check iommu_iotlb_flush_all(struct domain *d);
 
+void iommu_dev_iotlb_flush_timeout(struct domain *d,
+                                   struct pci_dev *pdev);
+
 /*
  * The purpose of the iommu_dont_flush_iotlb optional cpu flag is to
  * avoid unecessary iotlb_flush in the low level IOMMU code.
diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h
index 6ed29dd..e4940cd 100644
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -118,6 +118,7 @@ int pci_add_device(u16 seg, u8 bus, u8 devfn,
 int pci_remove_device(u16 seg, u8 bus, u8 devfn);
 int pci_ro_device(int seg, int bus, int devfn);
 int pci_hide_device(int bus, int devfn);
+void pci_hide_existing_device(struct pci_dev *pdev);
 struct pci_dev *pci_get_pdev(int seg, int bus, int devfn);
 struct pci_dev *pci_get_real_pdev(int seg, int bus, int devfn);
 struct pci_dev *pci_get_pdev_by_domain(
-- 
1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.