[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v2 2/2] VT-d: Fix vt-d flush timeout issue.



If IOTLB/Context/IETC flush is timeout, we should think
all devices under this IOMMU cannot function correctly.
So for each device under this IOMMU we'll mark it as
unassignable and kill the domain owning the device.

If Device-TLB flush is timeout, we'll mark the target
ATS device as unassignable and kill the domain owning
this device.

If impacted domain is hardware domain, just throw out
a warning. It's an open here whether we want to kill
hardware domain (or directly panic hypervisor). Comments
are welcomed.

Device marked as unassignable will be disallowed to be
further assigned to any domain.

Signed-off-by: Quan Xu <quan.xu@xxxxxxxxx>
---
 xen/drivers/passthrough/vtd/extern.h  |  4 ++
 xen/drivers/passthrough/vtd/iommu.c   |  6 +++
 xen/drivers/passthrough/vtd/iommu.h   |  5 ++
 xen/drivers/passthrough/vtd/qinval.c  | 86 ++++++++++++++++++++++++++++++++++-
 xen/drivers/passthrough/vtd/x86/ats.c | 16 +++++++
 xen/include/xen/pci.h                 |  7 +++
 6 files changed, 122 insertions(+), 2 deletions(-)

diff --git a/xen/drivers/passthrough/vtd/extern.h 
b/xen/drivers/passthrough/vtd/extern.h
index 8acf889..0a7d795 100644
--- a/xen/drivers/passthrough/vtd/extern.h
+++ b/xen/drivers/passthrough/vtd/extern.h
@@ -62,6 +62,10 @@ int dev_invalidate_iotlb(struct iommu *iommu, u16 did,
 int qinval_device_iotlb(struct iommu *iommu,
                         u32 max_invs_pend, u16 sid, u16 size, u64 addr);
 
+void invalidate_timeout(struct iommu *iommu, int type, u16 did,
+                        u16 seg, u8 bus, u8 devfn);
+int invalidate_sync(struct iommu *iommu);
+
 unsigned int get_cache_line_size(void);
 void cacheline_flush(char *);
 void flush_all_cache(void);
diff --git a/xen/drivers/passthrough/vtd/iommu.c 
b/xen/drivers/passthrough/vtd/iommu.c
index dd13865..9317adb 100644
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -1890,6 +1890,9 @@ static int intel_iommu_add_device(u8 devfn, struct 
pci_dev *pdev)
     if ( !pdev->domain )
         return -EINVAL;
 
+    if ( IS_PDEV_UNASSIGNABLE(pdev) )
+        return -EACCES;
+
     ret = domain_context_mapping(pdev->domain, devfn, pdev);
     if ( ret )
     {
@@ -2301,6 +2304,9 @@ static int intel_iommu_assign_device(
     if ( list_empty(&acpi_drhd_units) )
         return -ENODEV;
 
+    if ( IS_PDEV_UNASSIGNABLE(pdev) )
+        return -EACCES;
+
     seg = pdev->seg;
     bus = pdev->bus;
     /*
diff --git a/xen/drivers/passthrough/vtd/iommu.h 
b/xen/drivers/passthrough/vtd/iommu.h
index ac71ed1..c3beaa6 100644
--- a/xen/drivers/passthrough/vtd/iommu.h
+++ b/xen/drivers/passthrough/vtd/iommu.h
@@ -452,6 +452,11 @@ struct qinval_entry {
 
 #define RESERVED_VAL        0
 
+#define INVALID_DID    ((u16)~0)
+#define INVALID_SEG    ((u16)~0)
+#define INVALID_BUS    ((u8)~0)
+#define INVALID_DEVFN  ((u8)~0)
+
 #define TYPE_INVAL_CONTEXT      0x1
 #define TYPE_INVAL_IOTLB        0x2
 #define TYPE_INVAL_DEVICE_IOTLB 0x3
diff --git a/xen/drivers/passthrough/vtd/qinval.c 
b/xen/drivers/passthrough/vtd/qinval.c
index 990baf2..bf7f5b0 100644
--- a/xen/drivers/passthrough/vtd/qinval.c
+++ b/xen/drivers/passthrough/vtd/qinval.c
@@ -27,12 +27,62 @@
 #include "dmar.h"
 #include "vtd.h"
 #include "extern.h"
+#include "../ats.h"
 
 static int __read_mostly iommu_qi_timeout_ms = 1;
 integer_param("iommu_qi_timeout_ms", iommu_qi_timeout_ms);
 
 #define IOMMU_QI_TIMEOUT (iommu_qi_timeout_ms * MILLISECS(1))
 
+void invalidate_timeout(struct iommu *iommu, int type, u16 did,
+                        u16 seg, u8 bus, u8 devfn)
+{
+    struct domain *d;
+    unsigned long nr_dom, i;
+    struct pci_dev *pdev;
+
+    switch (type) {
+    case TYPE_INVAL_IOTLB:
+    case TYPE_INVAL_CONTEXT:
+    case TYPE_INVAL_IEC:
+        nr_dom = cap_ndoms(iommu->cap);
+        i = find_first_bit(iommu->domid_bitmap, nr_dom);
+        while ( i < nr_dom ) {
+            d = rcu_lock_domain_by_id(iommu->domid_map[i]);
+            ASSERT(d);
+
+            /* Mark the devices as unassignable. */
+            for_each_pdev(d, pdev)
+                mark_pdev_unassignable(pdev);
+            if ( d != hardware_domain )
+                domain_kill(d);
+
+            rcu_unlock_domain(d);
+            i = find_next_bit(iommu->domid_bitmap, nr_dom, i + 1);
+        }
+        break;
+
+    case TYPE_INVAL_DEVICE_IOTLB:
+        d = rcu_lock_domain_by_id(iommu->domid_map[did]);
+        ASSERT(d);
+        for_each_pdev(d, pdev)
+            if ( (pdev->seg == seg) &&
+                 (pdev->bus == bus) &&
+                 (pdev->devfn == devfn) )
+                mark_pdev_unassignable(pdev);
+
+        if ( d != hardware_domain )
+            domain_kill(d);
+        rcu_unlock_domain(d);
+        break;
+
+    default:
+        dprintk(XENLOG_WARNING VTDPREFIX, "Invalid VT-d flush type.\n");
+        break;
+
+    }
+}
+
 static void print_qi_regs(struct iommu *iommu)
 {
     u64 val;
@@ -187,7 +237,7 @@ static int queue_invalidate_wait(struct iommu *iommu,
     return -EOPNOTSUPP;
 }
 
-static int invalidate_sync(struct iommu *iommu)
+int invalidate_sync(struct iommu *iommu)
 {
     struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
 
@@ -262,6 +312,15 @@ static int __iommu_flush_iec(struct iommu *iommu, u8 
granu, u8 im, u16 iidx)
 
     queue_invalidate_iec(iommu, granu, im, iidx);
     ret = invalidate_sync(iommu);
+
+    if ( ret == -ETIMEDOUT )
+    {
+        invalidate_timeout(iommu, TYPE_INVAL_IEC, INVALID_DID,
+                           INVALID_SEG, INVALID_BUS, INVALID_DEVFN);
+        dprintk(XENLOG_WARNING VTDPREFIX,
+                "IEC flush timeout.\n");
+        return ret;
+    }
     /*
      * reading vt-d architecture register will ensure
      * draining happens in implementation independent way.
@@ -308,6 +367,15 @@ static int flush_context_qi(
         queue_invalidate_context(iommu, did, sid, fm,
                                  type >> DMA_CCMD_INVL_GRANU_OFFSET);
         ret = invalidate_sync(iommu);
+        if ( ret == -ETIMEDOUT )
+        {
+            invalidate_timeout(iommu, TYPE_INVAL_CONTEXT,
+                               INVALID_DID,
+                               INVALID_SEG, INVALID_BUS, INVALID_DEVFN);
+            dprintk(XENLOG_WARNING  VTDPREFIX,
+                    "Context flush timeout.\n");
+            return ret;
+        }
     }
     return ret;
 }
@@ -349,9 +417,23 @@ static int flush_iotlb_qi(
         queue_invalidate_iotlb(iommu,
                                type >> DMA_TLB_FLUSH_GRANU_OFFSET, dr,
                                dw, did, size_order, 0, addr);
+
+        /*
+         * Synchronize with hardware for invalidation request descriptors
+         * submitted before Device-TLB invalidate descriptor.
+         */
+        rc = invalidate_sync(iommu);
+        if ( rc == -ETIMEDOUT )
+        {
+            invalidate_timeout(iommu, TYPE_INVAL_IOTLB, INVALID_DID,
+                               INVALID_SEG, INVALID_BUS, INVALID_DEVFN);
+            dprintk(XENLOG_WARNING VTDPREFIX, "IOTLB flush timeout.\n");
+            return rc;
+        }
+
         if ( flush_dev_iotlb )
             ret = dev_invalidate_iotlb(iommu, did, addr, size_order, type);
-        rc = invalidate_sync(iommu);
+
         if ( !ret )
             ret = rc;
     }
diff --git a/xen/drivers/passthrough/vtd/x86/ats.c 
b/xen/drivers/passthrough/vtd/x86/ats.c
index 7c797f6..8745ef4 100644
--- a/xen/drivers/passthrough/vtd/x86/ats.c
+++ b/xen/drivers/passthrough/vtd/x86/ats.c
@@ -156,6 +156,22 @@ int dev_invalidate_iotlb(struct iommu *iommu, u16 did,
 
             rc = qinval_device_iotlb(iommu, pdev->ats_queue_depth,
                                      sid, sbit, addr);
+
+            /*
+             * Synchronize with hardware for Device-TLB invalidate
+             * descriptor.
+             */
+            rc = invalidate_sync(iommu);
+
+            if ( rc == -ETIMEDOUT )
+            {
+                invalidate_timeout(iommu, TYPE_INVAL_DEVICE_IOTLB, did,
+                                   pdev->seg, pdev->bus, pdev->devfn);
+                dprintk(XENLOG_WARNING VTDPREFIX,
+                        "Device-TLB flush timeout.\n");
+                return rc;
+            }
+
             break;
         default:
             dprintk(XENLOG_WARNING VTDPREFIX, "invalid vt-d flush type\n");
diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h
index a5aef55..0bf6b1a 100644
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -41,6 +41,7 @@
 struct pci_dev_info {
     bool_t is_extfn;
     bool_t is_virtfn;
+    bool_t is_unassignable;
     struct {
         u8 bus;
         u8 devfn;
@@ -88,6 +89,12 @@ struct pci_dev {
 #define for_each_pdev(domain, pdev) \
     list_for_each_entry(pdev, &(domain->arch.pdev_list), domain_list)
 
+#define PDEV_UNASSIGNABLE 1
+#define mark_pdev_unassignable(pdev) \
+    pdev->info.is_unassignable = PDEV_UNASSIGNABLE
+
+#define IS_PDEV_UNASSIGNABLE(pdev) pdev->info.is_unassignable
+
 /*
  * The pcidevs_lock protect alldevs_list, and the assignment for the 
  * devices, it also sync the access to the msi capability that is not
-- 
1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.