[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] vtd: fix Dom0 S3 when VT-d is enabled.



# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1237541817 0
# Node ID 115c164721dce0edfec63db3ea0fff492829ab88
# Parent  bbfcea821a0ded028c1a81101b5992cafca32f0b
vtd: fix Dom0 S3 when VT-d is enabled.

On some platforms that support Queued Invalidation and Interrupt
Remapping, Dom0 S3 doesn't work. The patch fixes the issue.

1) In device_power_down(), we should invoke iommu_suspend() after
ioapic_suspend(); in device_power_up(), we should invoke
iommu_resume() before ioapic_resume().

2) Add 2 functions: disable_qinval() and disable_intremap(); in
iommu_suspend(), we invoke them and iommu_disable_translation().
   Rename qinval_setup() to enable_qinval() and rename
   intremap_setup() to enable_intremap().

3) In iommu_resume(), remove the unnecessary
iommu_flush_{context, iotlb}_global() -- actually we mustn't do that
if Queued Invalidation was enabled before S3 because at this point of
S3 resume, Queued Invalidation hasn't been re-enabled.

4) Add a static global array ioapic_pin_to_intremap_index[] to
remember what intremap_index an ioapic pin uses -- during S3 resume,
ioapic_resume() re-writes all the ioapic RTEs, so we can use the array
to re-use the previously-allocated IRTE;

5) Some cleanups:
   a) Change some failure handlings in enable_intremap() to panic().
   b) Remove the unnecessary local variable iec_cap in
   __iommu_flush_iec().
   c) Add a dmar_writeq(iommu->reg, DMAR_IQT_REG, 0) in
   enable_qinval().

Signed-off-by: Dexuan Cui <dexuan.cui@xxxxxxxxx>
---
 xen/arch/x86/acpi/power.c              |   24 ++++++------
 xen/drivers/passthrough/vtd/extern.h   |    6 ++-
 xen/drivers/passthrough/vtd/intremap.c |   65 ++++++++++++++++++++-------------
 xen/drivers/passthrough/vtd/iommu.c    |   22 ++++++++---
 xen/drivers/passthrough/vtd/qinval.c   |   29 ++++++++++++--
 5 files changed, 98 insertions(+), 48 deletions(-)

diff -r bbfcea821a0d -r 115c164721dc xen/arch/x86/acpi/power.c
--- a/xen/arch/x86/acpi/power.c Fri Mar 20 09:34:09 2009 +0000
+++ b/xen/arch/x86/acpi/power.c Fri Mar 20 09:36:57 2009 +0000
@@ -44,16 +44,16 @@ void do_suspend_lowlevel(void);
 
 static int device_power_down(void)
 {
+    console_suspend();
+
+    time_suspend();
+
+    i8259A_suspend();
+
+    ioapic_suspend();
+
     iommu_suspend();
 
-    console_suspend();
-
-    time_suspend();
-
-    i8259A_suspend();
-    
-    ioapic_suspend();
-    
     lapic_suspend();
 
     return 0;
@@ -62,16 +62,16 @@ static void device_power_up(void)
 static void device_power_up(void)
 {
     lapic_resume();
-    
+
+    iommu_resume();
+
     ioapic_resume();
 
     i8259A_resume();
-    
+
     time_resume();
 
     console_resume();
-
-    iommu_resume();
 }
 
 static void freeze_domains(void)
diff -r bbfcea821a0d -r 115c164721dc xen/drivers/passthrough/vtd/extern.h
--- a/xen/drivers/passthrough/vtd/extern.h      Fri Mar 20 09:34:09 2009 +0000
+++ b/xen/drivers/passthrough/vtd/extern.h      Fri Mar 20 09:36:57 2009 +0000
@@ -30,8 +30,10 @@ void print_vtd_entries(struct iommu *iom
 void print_vtd_entries(struct iommu *iommu, int bus, int devfn, u64 gmfn);
 void dump_iommu_info(unsigned char key);
 
-int qinval_setup(struct iommu *iommu);
-int intremap_setup(struct iommu *iommu);
+int enable_qinval(struct iommu *iommu);
+void disable_qinval(struct iommu *iommu);
+int enable_intremap(struct iommu *iommu);
+void disable_intremap(struct iommu *iommu);
 int queue_invalidate_context(struct iommu *iommu,
     u16 did, u16 source_id, u8 function_mask, u8 granu);
 int queue_invalidate_iotlb(struct iommu *iommu,
diff -r bbfcea821a0d -r 115c164721dc xen/drivers/passthrough/vtd/intremap.c
--- a/xen/drivers/passthrough/vtd/intremap.c    Fri Mar 20 09:34:09 2009 +0000
+++ b/xen/drivers/passthrough/vtd/intremap.c    Fri Mar 20 09:36:57 2009 +0000
@@ -34,6 +34,15 @@
 #define dest_SMI -1
 #endif
 
+/* The max number of IOAPIC (or IOSAPIC) pin. The typical values can be 24 or
+ * 48 on x86 and Itanium platforms. Here we use a biger number 256. This
+ * should be big enough. Actually now IREMAP_ENTRY_NR is also 256.
+ */
+#define MAX_IOAPIC_PIN_NUM  256
+
+static int ioapic_pin_to_intremap_index[MAX_IOAPIC_PIN_NUM] =
+    { [0 ... MAX_IOAPIC_PIN_NUM-1] = -1 };
+
 u16 apicid_to_bdf(int apic_id)
 {
     struct acpi_drhd_unit *drhd = ioapic_to_drhd(apic_id);
@@ -94,7 +103,7 @@ static int remap_entry_to_ioapic_rte(
 }
 
 static int ioapic_rte_to_remap_entry(struct iommu *iommu,
-    int apic_id, struct IO_xAPIC_route_entry *old_rte,
+    int apic_id, unsigned int ioapic_pin, struct IO_xAPIC_route_entry *old_rte,
     unsigned int rte_upper, unsigned int value)
 {
     struct iremap_entry *iremap_entry = NULL, *iremap_entries;
@@ -108,13 +117,14 @@ static int ioapic_rte_to_remap_entry(str
     remap_rte = (struct IO_APIC_route_remap_entry *) old_rte;
     spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
 
-    if ( remap_rte->format == 0 )
+    if ( ioapic_pin_to_intremap_index[ioapic_pin] < 0 )
     {
         ir_ctrl->iremap_index++;
         index = ir_ctrl->iremap_index;
+        ioapic_pin_to_intremap_index[ioapic_pin] = index;
     }
     else
-        index = (remap_rte->index_15 << 15) | remap_rte->index_0_14;
+        index = ioapic_pin_to_intremap_index[ioapic_pin];
 
     if ( index > IREMAP_ENTRY_NR - 1 )
     {
@@ -232,6 +242,7 @@ void io_apic_write_remap_rte(
 void io_apic_write_remap_rte(
     unsigned int apic, unsigned int reg, unsigned int value)
 {
+    unsigned int ioapic_pin = (reg - 0x10) / 2;
     struct IO_xAPIC_route_entry old_rte = { 0 };
     struct IO_APIC_route_remap_entry *remap_rte;
     unsigned int rte_upper = (reg & 1) ? 1 : 0;
@@ -289,7 +300,8 @@ void io_apic_write_remap_rte(
     *(IO_APIC_BASE(apic)+4) = *(((int *)&old_rte)+0);
     remap_rte->mask = saved_mask;
 
-    if ( ioapic_rte_to_remap_entry(iommu, IO_APIC_ID(apic),
+    ASSERT(ioapic_pin < MAX_IOAPIC_PIN_NUM);
+    if ( ioapic_rte_to_remap_entry(iommu, IO_APIC_ID(apic), ioapic_pin,
                                    &old_rte, rte_upper, value) )
     {
         *IO_APIC_BASE(apic) = rte_upper ? (reg + 1) : reg;
@@ -491,13 +503,12 @@ void msi_msg_write_remap_rte(
 }
 #endif
 
-int intremap_setup(struct iommu *iommu)
+int enable_intremap(struct iommu *iommu)
 {
     struct ir_ctrl *ir_ctrl;
     s_time_t start_time;
 
-    if ( !ecap_intr_remap(iommu->ecap) )
-        return -ENODEV;
+    ASSERT(ecap_intr_remap(iommu->ecap) && iommu_intremap);
 
     ir_ctrl = iommu_ir_ctrl(iommu);
     if ( ir_ctrl->iremap_maddr == 0 )
@@ -517,7 +528,7 @@ int intremap_setup(struct iommu *iommu)
     ir_ctrl->iremap_maddr |=
             ecap_ext_intr(iommu->ecap) ? (1 << IRTA_REG_EIME_SHIFT) : 0;
 #endif
-    /* set size of the interrupt remapping table */ 
+    /* set size of the interrupt remapping table */
     ir_ctrl->iremap_maddr |= IRTA_REG_TABLE_SIZE;
     dmar_writeq(iommu->reg, DMAR_IRTA_REG, ir_ctrl->iremap_maddr);
 
@@ -530,11 +541,7 @@ int intremap_setup(struct iommu *iommu)
     while ( !(dmar_readl(iommu->reg, DMAR_GSTS_REG) & DMA_GSTS_SIRTPS) )
     {
         if ( NOW() > (start_time + DMAR_OPERATION_TIMEOUT) )
-        {
-            dprintk(XENLOG_ERR VTDPREFIX,
-                    "Cannot set SIRTP field for interrupt remapping\n");
-            return -ENODEV;
-        }
+            panic("Cannot set SIRTP field for interrupt remapping\n");
         cpu_relax();
     }
 
@@ -546,11 +553,7 @@ int intremap_setup(struct iommu *iommu)
     while ( !(dmar_readl(iommu->reg, DMAR_GSTS_REG) & DMA_GSTS_CFIS) )
     {
         if ( NOW() > (start_time + DMAR_OPERATION_TIMEOUT) )
-        {
-            dprintk(XENLOG_ERR VTDPREFIX,
-                    "Cannot set CFI field for interrupt remapping\n");
-            return -ENODEV;
-        }
+            panic("Cannot set CFI field for interrupt remapping\n");
         cpu_relax();
     }
 
@@ -561,12 +564,8 @@ int intremap_setup(struct iommu *iommu)
     start_time = NOW();
     while ( !(dmar_readl(iommu->reg, DMAR_GSTS_REG) & DMA_GSTS_IRES) )
     {
-        if ( NOW() > (start_time + DMAR_OPERATION_TIMEOUT) ) 
-        {
-            dprintk(XENLOG_ERR VTDPREFIX,
-                    "Cannot set IRE field for interrupt remapping\n");
-            return -ENODEV;
-        }
+        if ( NOW() > (start_time + DMAR_OPERATION_TIMEOUT) )
+            panic("Cannot set IRE field for interrupt remapping\n");
         cpu_relax();
     }
 
@@ -575,3 +574,21 @@ int intremap_setup(struct iommu *iommu)
 
     return 0;
 }
+
+void disable_intremap(struct iommu *iommu)
+{
+    s_time_t start_time;
+
+    ASSERT(ecap_intr_remap(iommu->ecap) && iommu_intremap);
+
+    iommu->gcmd &= ~(DMA_GCMD_SIRTP | DMA_GCMD_CFI | DMA_GCMD_IRE);
+    dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd);
+
+    start_time = NOW();
+    while ( dmar_readl(iommu->reg, DMAR_GSTS_REG) & DMA_GSTS_IRES )
+    {
+        if ( NOW() > (start_time + DMAR_OPERATION_TIMEOUT) )
+            panic("Cannot clear IRE field for interrupt remapping\n");
+        cpu_relax();
+    }
+}
diff -r bbfcea821a0d -r 115c164721dc xen/drivers/passthrough/vtd/iommu.c
--- a/xen/drivers/passthrough/vtd/iommu.c       Fri Mar 20 09:34:09 2009 +0000
+++ b/xen/drivers/passthrough/vtd/iommu.c       Fri Mar 20 09:36:57 2009 +0000
@@ -639,7 +639,7 @@ static void iommu_enable_translation(str
     spin_unlock_irqrestore(&iommu->register_lock, flags);
 }
 
-int iommu_disable_translation(struct iommu *iommu)
+static void iommu_disable_translation(struct iommu *iommu)
 {
     u32 sts;
     unsigned long flags;
@@ -662,7 +662,6 @@ int iommu_disable_translation(struct iom
         cpu_relax();
     }
     spin_unlock_irqrestore(&iommu->register_lock, flags);
-    return 0;
 }
 
 static struct iommu *vector_to_iommu[NR_VECTORS];
@@ -1690,7 +1689,7 @@ static int init_vtd_hw(void)
         for_each_drhd_unit ( drhd )
         {
             iommu = drhd->iommu;
-            if ( qinval_setup(iommu) != 0 )
+            if ( enable_qinval(iommu) != 0 )
             {
                 dprintk(XENLOG_INFO VTDPREFIX,
                         "Failed to enable Queued Invalidation!\n");
@@ -1704,7 +1703,7 @@ static int init_vtd_hw(void)
         for_each_drhd_unit ( drhd )
         {
             iommu = drhd->iommu;
-            if ( intremap_setup(iommu) != 0 )
+            if ( enable_intremap(iommu) != 0 )
             {
                 dprintk(XENLOG_INFO VTDPREFIX,
                         "Failed to enable Interrupt Remapping!\n");
@@ -1934,6 +1933,14 @@ void iommu_suspend(void)
             (u32) dmar_readl(iommu->reg, DMAR_FEADDR_REG);
         iommu_state[i][DMAR_FEUADDR_REG] =
             (u32) dmar_readl(iommu->reg, DMAR_FEUADDR_REG);
+
+        iommu_disable_translation(iommu);
+
+        if ( iommu_intremap )
+            disable_intremap(iommu);
+
+        if ( iommu_qinval )
+            disable_qinval(iommu);
     }
 }
 
@@ -1946,7 +1953,11 @@ void iommu_resume(void)
     if ( !vtd_enabled )
         return;
 
-    iommu_flush_all();
+    /* Not sure whether the flush operation is required to meet iommu
+     * specification. Note that BIOS also executes in S3 resume and iommu may
+     * be touched again, so let us do the flush operation for safety.
+     */
+    flush_all_cache();
 
     if ( init_vtd_hw() != 0  && force_iommu )
          panic("IOMMU setup failed, crash Xen for security purpose!\n");
@@ -1964,6 +1975,7 @@ void iommu_resume(void)
                     (u32) iommu_state[i][DMAR_FEADDR_REG]);
         dmar_writel(iommu->reg, DMAR_FEUADDR_REG,
                     (u32) iommu_state[i][DMAR_FEUADDR_REG]);
+
         iommu_enable_translation(iommu);
     }
 }
diff -r bbfcea821a0d -r 115c164721dc xen/drivers/passthrough/vtd/qinval.c
--- a/xen/drivers/passthrough/vtd/qinval.c      Fri Mar 20 09:34:09 2009 +0000
+++ b/xen/drivers/passthrough/vtd/qinval.c      Fri Mar 20 09:36:57 2009 +0000
@@ -319,7 +319,6 @@ int queue_invalidate_iec(struct iommu *i
 
 int __iommu_flush_iec(struct iommu *iommu, u8 granu, u8 im, u16 iidx)
 {
-    u64 iec_cap;
     int ret;
     ret = queue_invalidate_iec(iommu, granu, im, iidx);
     ret |= invalidate_sync(iommu);
@@ -328,7 +327,7 @@ int __iommu_flush_iec(struct iommu *iomm
      * reading vt-d architecture register will ensure
      * draining happens in implementation independent way.
      */
-    iec_cap = dmar_readq(iommu->reg, DMAR_CAP_REG);
+    (void)dmar_readq(iommu->reg, DMAR_CAP_REG);
     return ret;
 }
 
@@ -413,7 +412,7 @@ static int flush_iotlb_qi(
     return ret;
 }
 
-int qinval_setup(struct iommu *iommu)
+int enable_qinval(struct iommu *iommu)
 {
     s_time_t start_time;
     struct qi_ctrl *qi_ctrl;
@@ -422,8 +421,7 @@ int qinval_setup(struct iommu *iommu)
     qi_ctrl = iommu_qi_ctrl(iommu);
     flush = iommu_get_flush(iommu);
 
-    if ( !ecap_queued_inval(iommu->ecap) )
-        return -ENODEV;
+    ASSERT(ecap_queued_inval(iommu->ecap) && iommu_qinval);
 
     if ( qi_ctrl->qinval_maddr == 0 )
     {
@@ -448,6 +446,8 @@ int qinval_setup(struct iommu *iommu)
     qi_ctrl->qinval_maddr |= IQA_REG_QS;
     dmar_writeq(iommu->reg, DMAR_IQA_REG, qi_ctrl->qinval_maddr);
 
+    dmar_writeq(iommu->reg, DMAR_IQT_REG, 0);
+
     /* enable queued invalidation hardware */
     iommu->gcmd |= DMA_GCMD_QIE;
     dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd);
@@ -463,3 +463,22 @@ int qinval_setup(struct iommu *iommu)
 
     return 0;
 }
+
+void disable_qinval(struct iommu *iommu)
+{
+    s_time_t start_time;
+
+    ASSERT(ecap_queued_inval(iommu->ecap) && iommu_qinval);
+
+    iommu->gcmd &= ~DMA_GCMD_QIE;
+    dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd);
+
+    /* Make sure hardware complete it */
+    start_time = NOW();
+    while ( dmar_readl(iommu->reg, DMAR_GSTS_REG) & DMA_GSTS_QIES )
+    {
+        if ( NOW() > (start_time + DMAR_OPERATION_TIMEOUT) )
+            panic("Cannot clear QIE field for queue invalidation\n");
+        cpu_relax();
+    }
+}

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.