[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH, v2] IOMMU: don't immediately disable bus mastering on faults



Instead, give the owning domain at least a small opportunity of fixing
things up, and allow for rare faults to not bring down the device at
all. The amount of faults tolerated within a given time period (all
numbers are made up with no specific rationale) is higher for Dom0 than
for DomU-s.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>

--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -566,7 +566,7 @@ static hw_irq_controller iommu_maskable_
 
 static void parse_event_log_entry(struct amd_iommu *iommu, u32 entry[])
 {
-    u16 domain_id, device_id, bdf, cword, flags;
+    u16 domain_id, device_id, bdf, flags;
     u32 code;
     u64 *addr;
     int count = 0;
@@ -620,18 +620,10 @@ static void parse_event_log_entry(struct
                "fault address = %#"PRIx64", flags = %#x\n",
                event_str[code-1], domain_id, device_id, *addr, flags);
 
-        /* Tell the device to stop DMAing; we can't rely on the guest to
-         * control it for us. */
         for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ )
             if ( get_dma_requestor_id(iommu->seg, bdf) == device_id )
-            {
-                cword = pci_conf_read16(iommu->seg, PCI_BUS(bdf),
-                                        PCI_SLOT(bdf), PCI_FUNC(bdf),
-                                        PCI_COMMAND);
-                pci_conf_write16(iommu->seg, PCI_BUS(bdf), PCI_SLOT(bdf),
-                                 PCI_FUNC(bdf), PCI_COMMAND, 
-                                 cword & ~PCI_COMMAND_MASTER);
-            }
+                pci_check_disable_device(iommu->seg, PCI_BUS(bdf),
+                                         PCI_DEVFN2(bdf));
     }
     else
     {
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -215,6 +215,7 @@ static int device_assigned(u16 seg, u8 b
 static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn)
 {
     struct hvm_iommu *hd = domain_hvm_iommu(d);
+    struct pci_dev *pdev;
     int rc = 0;
 
     if ( !iommu_enabled || !hd->platform_ops )
@@ -228,6 +229,13 @@ static int assign_device(struct domain *
         return -EXDEV;
 
     spin_lock(&pcidevs_lock);
+    pdev = pci_get_pdev(seg, bus, devfn);
+    if ( pdev )
+    {
+        pdev->fault.threshold = PT_FAULT_THRESHOLD_UNPRIV;
+        pdev->fault.count     = 0;
+    }
+
     if ( (rc = hd->platform_ops->assign_device(d, seg, bus, devfn)) )
         goto done;
 
@@ -239,6 +247,8 @@ static int assign_device(struct domain *
         goto done;
     }
 done:
+    if ( pdev && pdev->domain != d )
+        pdev->fault.threshold = PT_FAULT_THRESHOLD_PRIV;
     spin_unlock(&pcidevs_lock);
     return rc;
 }
@@ -379,6 +389,9 @@ int deassign_device(struct domain *d, u1
         return ret;
     }
 
+    pdev->fault.threshold = PT_FAULT_THRESHOLD_PRIV;
+    pdev->fault.count     = 0;
+
     if ( !has_arch_pdevs(d) && need_iommu(d) )
     {
         d->need_iommu = 0;
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -137,6 +137,7 @@ static struct pci_dev *alloc_pdev(struct
     *((u8*) &pdev->bus) = bus;
     *((u8*) &pdev->devfn) = devfn;
     pdev->domain = NULL;
+    pdev->fault.threshold = PT_FAULT_THRESHOLD_PRIV;
     INIT_LIST_HEAD(&pdev->msi_list);
     list_add(&pdev->alldevs_list, &pseg->alldevs_list);
     spin_lock_init(&pdev->msix_table_lock);
@@ -672,6 +673,36 @@ int __init pci_device_detect(u16 seg, u8
     return 1;
 }
 
+void pci_check_disable_device(u16 seg, u8 bus, u8 devfn)
+{
+    struct pci_dev *pdev;
+    s_time_t now = NOW();
+    u16 cword;
+
+    spin_lock(&pcidevs_lock);
+    pdev = pci_get_pdev(seg, bus, devfn);
+    if ( pdev )
+    {
+        if ( now < pdev->fault.time ||
+             now - pdev->fault.time > MILLISECS(10) )
+            pdev->fault.count >>= 1;
+        pdev->fault.time = now;
+        if ( ++pdev->fault.count < pdev->fault.threshold )
+            pdev = NULL;
+    }
+    spin_unlock(&pcidevs_lock);
+
+    if ( !pdev )
+        return;
+
+    /* Tell the device to stop DMAing; we can't rely on the guest to
+     * control it for us. */
+    cword = pci_conf_read16(seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+                            PCI_COMMAND);
+    pci_conf_write16(seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+                     PCI_COMMAND, cword & ~PCI_COMMAND_MASTER);
+}
+
 /*
  * scan pci devices to add all existed PCI devices to alldevs_list,
  * and setup pci hierarchy in array bus2bridge.
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -917,7 +917,7 @@ static void __do_iommu_page_fault(struct
     while (1)
     {
         u8 fault_reason;
-        u16 source_id, cword;
+        u16 source_id;
         u32 data;
         u64 guest_addr;
         int type;
@@ -950,14 +950,8 @@ static void __do_iommu_page_fault(struct
         iommu_page_fault_do_one(iommu, type, fault_reason,
                                 source_id, guest_addr);
 
-        /* Tell the device to stop DMAing; we can't rely on the guest to
-         * control it for us. */
-        cword = pci_conf_read16(iommu->intel->drhd->segment,
-                                PCI_BUS(source_id), PCI_SLOT(source_id),
-                                PCI_FUNC(source_id), PCI_COMMAND);
-        pci_conf_write16(iommu->intel->drhd->segment, PCI_BUS(source_id),
-                         PCI_SLOT(source_id), PCI_FUNC(source_id),
-                         PCI_COMMAND, cword & ~PCI_COMMAND_MASTER);
+        pci_check_disable_device(iommu->intel->drhd->segment,
+                                 PCI_BUS(source_id), PCI_DEVFN2(source_id));
 
         fault_index++;
         if ( fault_index > cap_num_fault_regs(iommu->cap) )
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -64,6 +64,13 @@ struct pci_dev {
     const u8 devfn;
     struct pci_dev_info info;
     struct arch_pci_dev arch;
+    struct {
+        s_time_t time;
+        unsigned int threshold;
+#define PT_FAULT_THRESHOLD_PRIV  10
+#define PT_FAULT_THRESHOLD_UNPRIV 3
+        unsigned int count;
+    } fault;
     u64 vf_rlen[6];
 };
 
@@ -107,6 +114,7 @@ int pci_hide_device(int bus, int devfn);
 struct pci_dev *pci_get_pdev(int seg, int bus, int devfn);
 struct pci_dev *pci_get_pdev_by_domain(
     struct domain *, int seg, int bus, int devfn);
+void pci_check_disable_device(u16 seg, u8 bus, u8 devfn);
 
 uint8_t pci_conf_read8(
     unsigned int seg, unsigned int bus, unsigned int dev, unsigned int func,


Attachment: IOMMU-defer-BM-disable.patch
Description: Text document

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.