[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 2/2] VT-d: Fix vt-d flush timeout issue.



If IOTLB/Context/IETC flush is timeout, we should think
all devices under this IOMMU cannot function correctly.
So for each device under this IOMMU we'll mark it as
unassignable and kill the domain owning the device.

If Device-TLB flush is timeout, we'll mark the target
ATS device as unassignable and kill the domain owning
this device. When the invalidation request descriptor
is timeout, hypervisor cannot find out which Device-TLB
invalidate descriptor submitted before is not correct.
So mark all of the domain's ATS devices as unassignable.

If impacted domain is Dom0 or hardware domain, just throw
out a warning. It's an open here whether we want to kill
Dom0 or hardware domain (or directly panic hypervisor).
Comments are welcomed.

Device marked as unassignable will be disallowed to be
further assigned to any domain.

Signed-off-by: Quan Xu <quan.xu@xxxxxxxxx>
---
 xen/drivers/passthrough/vtd/iommu.c  |   6 +++
 xen/drivers/passthrough/vtd/iommu.h  |   2 +
 xen/drivers/passthrough/vtd/qinval.c | 100 ++++++++++++++++++++++++++++++++++-
 xen/include/xen/pci.h                |   7 +++
 4 files changed, 114 insertions(+), 1 deletion(-)

diff --git a/xen/drivers/passthrough/vtd/iommu.c 
b/xen/drivers/passthrough/vtd/iommu.c
index dd13865..9317adb 100644
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -1890,6 +1890,9 @@ static int intel_iommu_add_device(u8 devfn, struct 
pci_dev *pdev)
     if ( !pdev->domain )
         return -EINVAL;
 
+    if ( IS_PDEV_UNASSIGNABLE(pdev) )
+        return -EACCES;
+
     ret = domain_context_mapping(pdev->domain, devfn, pdev);
     if ( ret )
     {
@@ -2301,6 +2304,9 @@ static int intel_iommu_assign_device(
     if ( list_empty(&acpi_drhd_units) )
         return -ENODEV;
 
+    if ( IS_PDEV_UNASSIGNABLE(pdev) )
+        return -EACCES;
+
     seg = pdev->seg;
     bus = pdev->bus;
     /*
diff --git a/xen/drivers/passthrough/vtd/iommu.h 
b/xen/drivers/passthrough/vtd/iommu.h
index ac71ed1..9437c42 100644
--- a/xen/drivers/passthrough/vtd/iommu.h
+++ b/xen/drivers/passthrough/vtd/iommu.h
@@ -452,6 +452,8 @@ struct qinval_entry {
 
 #define RESERVED_VAL        0
 
+#define INVALID_DID  ((u16)~0)
+
 #define TYPE_INVAL_CONTEXT      0x1
 #define TYPE_INVAL_IOTLB        0x2
 #define TYPE_INVAL_DEVICE_IOTLB 0x3
diff --git a/xen/drivers/passthrough/vtd/qinval.c 
b/xen/drivers/passthrough/vtd/qinval.c
index 990baf2..786134f 100644
--- a/xen/drivers/passthrough/vtd/qinval.c
+++ b/xen/drivers/passthrough/vtd/qinval.c
@@ -27,12 +27,66 @@
 #include "dmar.h"
 #include "vtd.h"
 #include "extern.h"
+#include "../ats.h"
 
 static int __read_mostly iommu_qi_timeout_ms = 1;
 integer_param("iommu_qi_timeout_ms", iommu_qi_timeout_ms);
 
 #define IOMMU_QI_TIMEOUT (iommu_qi_timeout_ms * MILLISECS(1))
 
+static void invalidate_timeout(struct iommu *iommu, int type, u16 did)
+{
+    struct domain *d;
+    unsigned long nr_dom, i;
+    struct pci_dev *pdev;
+
+    switch (type) {
+    case TYPE_INVAL_IOTLB:
+    case TYPE_INVAL_CONTEXT:
+    case TYPE_INVAL_IEC:
+        nr_dom = cap_ndoms(iommu->cap);
+        i = find_first_bit(iommu->domid_bitmap, nr_dom);
+        while ( i < nr_dom ) {
+            d = rcu_lock_domain_by_id(iommu->domid_map[i]);
+            ASSERT(d);
+
+            /* Mark the devices as unassignable. */
+            for_each_pdev(d, pdev)
+                mark_pdev_unassignable(pdev);
+            if ( d != hardware_domain && d->domain_id != 0 )
+                domain_kill(d);
+
+            rcu_unlock_domain(d);
+            i = find_next_bit(iommu->domid_bitmap, nr_dom, i + 1);
+        }
+        break;
+
+    case TYPE_INVAL_DEVICE_IOTLB:
+        d = rcu_lock_domain_by_id(iommu->domid_map[did]);
+        ASSERT(d);
+
+        /*
+         * When the invalidation request descriptor is timeout,
+         * hypervisor cannot find out which Device-TLB invalidate
+         * descriptor submitted before is not correct. So mark all
+         * of the domain's ATS devices as unassignable.
+         */
+        for_each_pdev(d, pdev)
+            if ( pci_ats_enabled(pdev->seg, pdev->bus, pdev->devfn) )
+                mark_pdev_unassignable(pdev);
+
+        if ( d != hardware_domain && d->domain_id != 0 )
+            domain_kill(d);
+        rcu_unlock_domain(d);
+        break;
+
+    default:
+        dprintk(XENLOG_WARNING VTDPREFIX, "Invalid VT-d flush type.\n");
+        break;
+
+    }
+}
+
 static void print_qi_regs(struct iommu *iommu)
 {
     u64 val;
@@ -262,6 +316,14 @@ static int __iommu_flush_iec(struct iommu *iommu, u8 
granu, u8 im, u16 iidx)
 
     queue_invalidate_iec(iommu, granu, im, iidx);
     ret = invalidate_sync(iommu);
+
+    if ( ret == -ETIMEDOUT )
+    {
+        invalidate_timeout(iommu, TYPE_INVAL_IEC, INVALID_DID);
+        dprintk(XENLOG_WARNING VTDPREFIX,
+                "IEC flush timeout.\n");
+        return ret;
+    }
     /*
      * reading vt-d architecture register will ensure
      * draining happens in implementation independent way.
@@ -308,6 +370,14 @@ static int flush_context_qi(
         queue_invalidate_context(iommu, did, sid, fm,
                                  type >> DMA_CCMD_INVL_GRANU_OFFSET);
         ret = invalidate_sync(iommu);
+        if ( ret == -ETIMEDOUT )
+        {
+            invalidate_timeout(iommu, TYPE_INVAL_CONTEXT,
+                               INVALID_DID);
+            dprintk(XENLOG_WARNING  VTDPREFIX,
+                    "Context flush timeout.\n");
+            return ret;
+        }
     }
     return ret;
 }
@@ -349,9 +419,37 @@ static int flush_iotlb_qi(
         queue_invalidate_iotlb(iommu,
                                type >> DMA_TLB_FLUSH_GRANU_OFFSET, dr,
                                dw, did, size_order, 0, addr);
+
+        /*
+         * Synchronize with hardware for invalidation request descriptors
+         * submitted before Device-TLB invalidate descriptor.
+         */
+        rc = invalidate_sync(iommu);
+        if ( ret == -ETIMEDOUT )
+        {
+            invalidate_timeout(iommu, TYPE_INVAL_IOTLB, INVALID_DID);
+            dprintk(XENLOG_WARNING VTDPREFIX, "IOTLB flush timeout.\n");
+            return ret;
+        }
+
         if ( flush_dev_iotlb )
+        {
+            /*
+             * Synchronize with hardware for Device-TLB invalidate
+             * descriptor.
+             */
             ret = dev_invalidate_iotlb(iommu, did, addr, size_order, type);
-        rc = invalidate_sync(iommu);
+            rc = invalidate_sync(iommu);
+
+            if ( ret == -ETIMEDOUT )
+            {
+                invalidate_timeout(iommu, TYPE_INVAL_DEVICE_IOTLB, did);
+                dprintk(XENLOG_WARNING VTDPREFIX,
+                        "Device-TLB flush timeout.\n");
+                return ret;
+            }
+        }
+
         if ( !ret )
             ret = rc;
     }
diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h
index a5aef55..0bf6b1a 100644
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -41,6 +41,7 @@
 struct pci_dev_info {
     bool_t is_extfn;
     bool_t is_virtfn;
+    bool_t is_unassignable;
     struct {
         u8 bus;
         u8 devfn;
@@ -88,6 +89,12 @@ struct pci_dev {
 #define for_each_pdev(domain, pdev) \
     list_for_each_entry(pdev, &(domain->arch.pdev_list), domain_list)
 
+#define PDEV_UNASSIGNABLE 1
+#define mark_pdev_unassignable(pdev) \
+    pdev->info.is_unassignable = PDEV_UNASSIGNABLE
+
+#define IS_PDEV_UNASSIGNABLE(pdev) pdev->info.is_unassignable
+
 /*
  * The pcidevs_lock protect alldevs_list, and the assignment for the 
  * devices, it also sync the access to the msi capability that is not
-- 
1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.