[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] [IOMMU] clean interrupt remapping and queued invalidation



# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1255946075 -3600
# Node ID 7e02a2cd1618240f64b83a89669f0fcfdc6cac2f
# Parent  dcc5d5d954e942efd9a143ff3d5769e82178ecad
[IOMMU] clean interrupt remapping and queued invalidation

This patch enlarges interrupt remapping table to fix the out-of range
table access when using many multiple-function PCI devices.
Invalidation queue is also expanded.

Signed-Off-By: Zhai Edwin <edwin.zhai@xxxxxxxxx>
Signed-Off-By: Cui Dexuan <dexuan.cui@xxxxxxxxx>
---
 xen/drivers/passthrough/vtd/intremap.c |   34 ++++++++++++------
 xen/drivers/passthrough/vtd/iommu.c    |   22 +++++++----
 xen/drivers/passthrough/vtd/iommu.h    |   41 ++++++++++++++++-----
 xen/drivers/passthrough/vtd/qinval.c   |   62 ++++++++++++++++-----------------
 xen/drivers/passthrough/vtd/utils.c    |   21 ++++++++---
 5 files changed, 115 insertions(+), 65 deletions(-)

diff -r dcc5d5d954e9 -r 7e02a2cd1618 xen/drivers/passthrough/vtd/intremap.c
--- a/xen/drivers/passthrough/vtd/intremap.c    Mon Oct 19 10:50:46 2009 +0100
+++ b/xen/drivers/passthrough/vtd/intremap.c    Mon Oct 19 10:54:35 2009 +0100
@@ -146,6 +146,7 @@ static int remap_entry_to_ioapic_rte(
     struct iremap_entry *iremap_entry = NULL, *iremap_entries;
     unsigned long flags;
     struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
+    u64 entry_base;
 
     if ( ir_ctrl == NULL )
     {
@@ -164,9 +165,11 @@ static int remap_entry_to_ioapic_rte(
 
     spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
 
+    entry_base = ir_ctrl->iremap_maddr +
+                 (( index >> IREMAP_ENTRY_ORDER ) << PAGE_SHIFT );
     iremap_entries =
-        (struct iremap_entry *)map_vtd_domain_page(ir_ctrl->iremap_maddr);
-    iremap_entry = &iremap_entries[index];
+        (struct iremap_entry *)map_vtd_domain_page(entry_base);
+    iremap_entry = &iremap_entries[index % (1 << IREMAP_ENTRY_ORDER)];
 
     old_rte->vector = iremap_entry->lo.vector;
     old_rte->delivery_mode = iremap_entry->lo.dlm;
@@ -192,6 +195,7 @@ static int ioapic_rte_to_remap_entry(str
     int index;
     unsigned long flags;
     struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
+    u64 entry_base;
 
     remap_rte = (struct IO_APIC_route_remap_entry *) old_rte;
     spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
@@ -208,15 +212,17 @@ static int ioapic_rte_to_remap_entry(str
     {
         dprintk(XENLOG_ERR VTDPREFIX,
                 "%s: intremap index (%d) is larger than"
-                " the maximum index (%ld)!\n",
+                " the maximum index (%d)!\n",
                 __func__, index, IREMAP_ENTRY_NR - 1);
         spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
         return -EFAULT;
     }
 
+    entry_base = ir_ctrl->iremap_maddr +
+                 (( index >> IREMAP_ENTRY_ORDER ) << PAGE_SHIFT );
     iremap_entries =
-        (struct iremap_entry *)map_vtd_domain_page(ir_ctrl->iremap_maddr);
-    iremap_entry = &iremap_entries[index];
+        (struct iremap_entry *)map_vtd_domain_page(entry_base);
+    iremap_entry = &iremap_entries[index % (1 << IREMAP_ENTRY_ORDER)];
 
     memcpy(&new_ire, iremap_entry, sizeof(struct iremap_entry));
 
@@ -425,6 +431,7 @@ static int remap_entry_to_msi_msg(
     int index;
     unsigned long flags;
     struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
+    u64 entry_base;
 
     if ( ir_ctrl == NULL )
     {
@@ -447,9 +454,11 @@ static int remap_entry_to_msi_msg(
 
     spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
 
+    entry_base = ir_ctrl->iremap_maddr +
+                 (( index >> IREMAP_ENTRY_ORDER ) << PAGE_SHIFT );
     iremap_entries =
-        (struct iremap_entry *)map_vtd_domain_page(ir_ctrl->iremap_maddr);
-    iremap_entry = &iremap_entries[index];
+        (struct iremap_entry *)map_vtd_domain_page(entry_base);
+    iremap_entry = &iremap_entries[index % (1 << IREMAP_ENTRY_ORDER)];
 
     msg->address_hi = MSI_ADDR_BASE_HI;
     msg->address_lo =
@@ -485,6 +494,7 @@ static int msi_msg_to_remap_entry(
     int index;
     unsigned long flags;
     struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
+    u64 entry_base;
 
     remap_rte = (struct msi_msg_remap_entry *) msg;
     spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
@@ -502,16 +512,18 @@ static int msi_msg_to_remap_entry(
     {
         dprintk(XENLOG_ERR VTDPREFIX,
                 "%s: intremap index (%d) is larger than"
-                " the maximum index (%ld)!\n",
+                " the maximum index (%d)!\n",
                 __func__, index, IREMAP_ENTRY_NR - 1);
         msi_desc->remap_index = -1;
         spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
         return -EFAULT;
     }
 
+    entry_base = ir_ctrl->iremap_maddr +
+                 (( index >> IREMAP_ENTRY_ORDER ) << PAGE_SHIFT );
     iremap_entries =
-        (struct iremap_entry *)map_vtd_domain_page(ir_ctrl->iremap_maddr);
-    iremap_entry = &iremap_entries[index];
+        (struct iremap_entry *)map_vtd_domain_page(entry_base);
+    iremap_entry = &iremap_entries[index % (1 << IREMAP_ENTRY_ORDER)];
     memcpy(&new_ire, iremap_entry, sizeof(struct iremap_entry));
 
     /* Set interrupt remapping table entry */
@@ -619,7 +631,7 @@ int enable_intremap(struct iommu *iommu)
     if ( ir_ctrl->iremap_maddr == 0 )
     {
         drhd = iommu_to_drhd(iommu);
-        ir_ctrl->iremap_maddr = alloc_pgtable_maddr(drhd, 1);
+        ir_ctrl->iremap_maddr = alloc_pgtable_maddr(drhd, IREMAP_ARCH_PAGE_NR 
);
         if ( ir_ctrl->iremap_maddr == 0 )
         {
             dprintk(XENLOG_WARNING VTDPREFIX,
diff -r dcc5d5d954e9 -r 7e02a2cd1618 xen/drivers/passthrough/vtd/iommu.c
--- a/xen/drivers/passthrough/vtd/iommu.c       Mon Oct 19 10:50:46 2009 +0100
+++ b/xen/drivers/passthrough/vtd/iommu.c       Mon Oct 19 10:54:35 2009 +0100
@@ -135,16 +135,16 @@ void iommu_flush_cache_entry(void *addr)
 
 void iommu_flush_cache_page(void *addr, unsigned long npages)
 {
-    __iommu_flush_cache(addr, PAGE_SIZE_4K * npages);
+    __iommu_flush_cache(addr, PAGE_SIZE * npages);
 }
 
 /* Allocate page table, return its machine address */
 u64 alloc_pgtable_maddr(struct acpi_drhd_unit *drhd, unsigned long npages)
 {
     struct acpi_rhsa_unit *rhsa;
-    struct page_info *pg;
+    struct page_info *pg, *cur_pg;
     u64 *vaddr;
-    int node = -1;
+    int node = -1, i;
 
     rhsa = drhd_to_rhsa(drhd);
     if ( rhsa )
@@ -154,11 +154,17 @@ u64 alloc_pgtable_maddr(struct acpi_drhd
                              (node == -1 ) ? 0 : MEMF_node(node));
     if ( !pg )
         return 0;
-    vaddr = __map_domain_page(pg);
-    memset(vaddr, 0, PAGE_SIZE * npages);
-
-    iommu_flush_cache_page(vaddr, npages);
-    unmap_domain_page(vaddr);
+
+    cur_pg = pg;
+    for ( i = 0; i < npages; i++ )
+    {
+        vaddr = __map_domain_page(cur_pg);
+        memset(vaddr, 0, PAGE_SIZE);
+
+        iommu_flush_cache_page(vaddr, 1);
+        unmap_domain_page(vaddr);
+        cur_pg++;
+    }
 
     return page_to_maddr(pg);
 }
diff -r dcc5d5d954e9 -r 7e02a2cd1618 xen/drivers/passthrough/vtd/iommu.h
--- a/xen/drivers/passthrough/vtd/iommu.h       Mon Oct 19 10:50:46 2009 +0100
+++ b/xen/drivers/passthrough/vtd/iommu.h       Mon Oct 19 10:54:35 2009 +0100
@@ -302,7 +302,23 @@ struct iremap_entry {
     }hi;
   };
 };
-#define IREMAP_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct iremap_entry))
+
+/* Max intr remapping table page order is 8, as max number of IRTEs is 64K */
+#define IREMAP_PAGE_ORDER  8
+
+/*
+ * VTd engine handles 4K page, while CPU may have different page size on
+ * different arch. E.g. 16K on IPF.
+ */
+#define IREMAP_ARCH_PAGE_ORDER  (IREMAP_PAGE_ORDER + PAGE_SHIFT_4K - 
PAGE_SHIFT)
+#define IREMAP_ARCH_PAGE_NR     ( IREMAP_ARCH_PAGE_ORDER < 0 ?  \
+                                1 :                             \
+                                1 << IREMAP_ARCH_PAGE_ORDER )
+
+/* Each entry is 16 bytes, so 2^8 entries per 4K page */
+#define IREMAP_ENTRY_ORDER  ( PAGE_SHIFT - 4 )
+#define IREMAP_ENTRY_NR     ( 1 << ( IREMAP_PAGE_ORDER + 8 ) )
+
 #define iremap_present(v) ((v).lo & 1)
 #define iremap_fault_disable(v) (((v).lo >> 1) & 1)
 
@@ -392,12 +408,17 @@ struct qinval_entry {
     }q;
 };
 
-/* Order of queue invalidation pages */
-#define IQA_REG_QS       0
-#define NUM_QINVAL_PAGES (1 << IQA_REG_QS)
-
-/* Each entry is 16 byte */
-#define QINVAL_ENTRY_NR  (1 << (IQA_REG_QS + 8))
+/* Order of queue invalidation pages(max is 8) */
+#define QINVAL_PAGE_ORDER   2
+
+#define QINVAL_ARCH_PAGE_ORDER  (QINVAL_PAGE_ORDER + PAGE_SHIFT_4K - 
PAGE_SHIFT)
+#define QINVAL_ARCH_PAGE_NR     ( QINVAL_ARCH_PAGE_ORDER < 0 ?  \
+                                1 :                             \
+                                1 << QINVAL_ARCH_PAGE_ORDER )
+
+/* Each entry is 16 bytes, so 2^8 entries per page */
+#define QINVAL_ENTRY_ORDER  ( PAGE_SHIFT - 4 )
+#define QINVAL_ENTRY_NR     (1 << (QINVAL_PAGE_ORDER + 8))
 
 /* Status data flag */
 #define QINVAL_STAT_INIT  0
@@ -429,9 +450,9 @@ struct qinval_entry {
 #define IEC_GLOBAL_INVL         0
 #define IEC_INDEX_INVL          1
 #define IRTA_REG_EIME_SHIFT     11
-#define IRTA_REG_TABLE_SIZE     7    // 4k page = 256 * 16 byte entries
-                                     // 2^^(IRTA_REG_TABLE_SIZE + 1) = 256
-                                     // IRTA_REG_TABLE_SIZE = 7
+
+/* 2^(IRTA_REG_TABLE_SIZE + 1) = IREMAP_ENTRY_NR */
+#define IRTA_REG_TABLE_SIZE     ( IREMAP_PAGE_ORDER + 7 )
 
 #define VTD_PAGE_TABLE_LEVEL_3  3
 #define VTD_PAGE_TABLE_LEVEL_4  4
diff -r dcc5d5d954e9 -r 7e02a2cd1618 xen/drivers/passthrough/vtd/qinval.c
--- a/xen/drivers/passthrough/vtd/qinval.c      Mon Oct 19 10:50:46 2009 +0100
+++ b/xen/drivers/passthrough/vtd/qinval.c      Mon Oct 19 10:54:35 2009 +0100
@@ -45,17 +45,15 @@ static void print_qi_regs(struct iommu *
 
 static int qinval_next_index(struct iommu *iommu)
 {
-    u64 tail, head;
+    u64 tail;
 
     tail = dmar_readq(iommu->reg, DMAR_IQT_REG);
     tail >>= QINVAL_INDEX_SHIFT;
 
-    head = dmar_readq(iommu->reg, DMAR_IQH_REG);
-    head >>= QINVAL_INDEX_SHIFT;
-
-    /* round wrap check */
-    if ( ( tail + 1 ) % QINVAL_ENTRY_NR == head  )
-        return -1;
+    /* (tail+1 == head) indicates a full queue, wait for HW */
+    while ( ( tail + 1 ) % QINVAL_ENTRY_NR ==
+            ( dmar_readq(iommu->reg, DMAR_IQH_REG) >> QINVAL_INDEX_SHIFT ) )
+        cpu_relax();
 
     return tail;
 }
@@ -77,11 +75,13 @@ static int gen_cc_inv_dsc(struct iommu *
     unsigned long flags;
     struct qinval_entry *qinval_entry = NULL, *qinval_entries;
     struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
+    u64 entry_base = qi_ctrl->qinval_maddr +
+                 (( index >> QINVAL_ENTRY_ORDER ) << PAGE_SHIFT );
 
     spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
     qinval_entries =
-        (struct qinval_entry *)map_vtd_domain_page(qi_ctrl->qinval_maddr);
-    qinval_entry = &qinval_entries[index];
+        (struct qinval_entry *)map_vtd_domain_page(entry_base);
+    qinval_entry = &qinval_entries[index % (1 << QINVAL_ENTRY_ORDER)];
     qinval_entry->q.cc_inv_dsc.lo.type = TYPE_INVAL_CONTEXT;
     qinval_entry->q.cc_inv_dsc.lo.granu = granu;
     qinval_entry->q.cc_inv_dsc.lo.res_1 = 0;
@@ -121,14 +121,14 @@ static int gen_iotlb_inv_dsc(struct iomm
     unsigned long flags;
     struct qinval_entry *qinval_entry = NULL, *qinval_entries;
     struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
-
-    if ( index == -1 )
-        return -1;
+    u64 entry_base = qi_ctrl->qinval_maddr +
+                 (( index >> QINVAL_ENTRY_ORDER ) << PAGE_SHIFT );
+
     spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
 
     qinval_entries =
-        (struct qinval_entry *)map_vtd_domain_page(qi_ctrl->qinval_maddr);
-    qinval_entry = &qinval_entries[index];
+        (struct qinval_entry *)map_vtd_domain_page(entry_base);
+    qinval_entry = &qinval_entries[index % (1 << QINVAL_ENTRY_ORDER)];
     qinval_entry->q.iotlb_inv_dsc.lo.type = TYPE_INVAL_IOTLB;
     qinval_entry->q.iotlb_inv_dsc.lo.granu = granu;
     qinval_entry->q.iotlb_inv_dsc.lo.dr = dr;
@@ -172,13 +172,13 @@ static int gen_wait_dsc(struct iommu *io
     unsigned long flags;
     struct qinval_entry *qinval_entry = NULL, *qinval_entries;
     struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
-
-    if ( index == -1 )
-        return -1;
+    u64 entry_base = qi_ctrl->qinval_maddr +
+                 (( index >> QINVAL_ENTRY_ORDER ) << PAGE_SHIFT );
+
     spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
     qinval_entries =
-        (struct qinval_entry *)map_vtd_domain_page(qi_ctrl->qinval_maddr);
-    qinval_entry = &qinval_entries[index];
+        (struct qinval_entry *)map_vtd_domain_page(entry_base);
+    qinval_entry = &qinval_entries[index % (1 << QINVAL_ENTRY_ORDER)];
     qinval_entry->q.inv_wait_dsc.lo.type = TYPE_INVAL_WAIT;
     qinval_entry->q.inv_wait_dsc.lo.iflag = iflag;
     qinval_entry->q.inv_wait_dsc.lo.sw = sw;
@@ -247,14 +247,14 @@ static int gen_dev_iotlb_inv_dsc(struct 
     unsigned long flags;
     struct qinval_entry *qinval_entry = NULL, *qinval_entries;
     struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
-
-    if ( index == -1 )
-        return -1;
+    u64 entry_base = qi_ctrl->qinval_maddr +
+                 (( index >> QINVAL_ENTRY_ORDER ) << PAGE_SHIFT );
+
     spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
 
     qinval_entries =
-        (struct qinval_entry *)map_vtd_domain_page(qi_ctrl->qinval_maddr);
-    qinval_entry = &qinval_entries[index];
+        (struct qinval_entry *)map_vtd_domain_page(entry_base);
+    qinval_entry = &qinval_entries[index % (1 << QINVAL_ENTRY_ORDER)];
     qinval_entry->q.dev_iotlb_inv_dsc.lo.type = TYPE_INVAL_DEVICE_IOTLB;
     qinval_entry->q.dev_iotlb_inv_dsc.lo.res_1 = 0;
     qinval_entry->q.dev_iotlb_inv_dsc.lo.max_invs_pend = max_invs_pend;
@@ -295,14 +295,14 @@ static int gen_iec_inv_dsc(struct iommu 
     unsigned long flags;
     struct qinval_entry *qinval_entry = NULL, *qinval_entries;
     struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
-
-    if ( index == -1 )
-        return -1;
+    u64 entry_base = qi_ctrl->qinval_maddr +
+                 (( index >> QINVAL_ENTRY_ORDER ) << PAGE_SHIFT );
+
     spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
 
     qinval_entries =
-        (struct qinval_entry *)map_vtd_domain_page(qi_ctrl->qinval_maddr);
-    qinval_entry = &qinval_entries[index];
+        (struct qinval_entry *)map_vtd_domain_page(entry_base);
+    qinval_entry = &qinval_entries[index % (1 << QINVAL_ENTRY_ORDER)];
     qinval_entry->q.iec_inv_dsc.lo.type = TYPE_INVAL_IEC;
     qinval_entry->q.iec_inv_dsc.lo.granu = granu;
     qinval_entry->q.iec_inv_dsc.lo.res_1 = 0;
@@ -445,7 +445,7 @@ int enable_qinval(struct iommu *iommu)
     if ( qi_ctrl->qinval_maddr == 0 )
     {
         drhd = iommu_to_drhd(iommu);
-        qi_ctrl->qinval_maddr = alloc_pgtable_maddr(drhd, NUM_QINVAL_PAGES);
+        qi_ctrl->qinval_maddr = alloc_pgtable_maddr(drhd, QINVAL_ARCH_PAGE_NR);
         if ( qi_ctrl->qinval_maddr == 0 )
         {
             dprintk(XENLOG_WARNING VTDPREFIX,
@@ -464,7 +464,7 @@ int enable_qinval(struct iommu *iommu)
      * registers are automatically reset to 0 with write
      * to IQA register.
      */
-    qi_ctrl->qinval_maddr |= IQA_REG_QS;
+    qi_ctrl->qinval_maddr |= QINVAL_PAGE_ORDER;
 
     spin_lock_irqsave(&iommu->register_lock, flags);
     dmar_writeq(iommu->reg, DMAR_IQA_REG, qi_ctrl->qinval_maddr);
diff -r dcc5d5d954e9 -r 7e02a2cd1618 xen/drivers/passthrough/vtd/utils.c
--- a/xen/drivers/passthrough/vtd/utils.c       Mon Oct 19 10:50:46 2009 +0100
+++ b/xen/drivers/passthrough/vtd/utils.c       Mon Oct 19 10:54:35 2009 +0100
@@ -226,8 +226,7 @@ static void dump_iommu_info(unsigned cha
             /* Dump interrupt remapping table. */
             u64 iremap_maddr = dmar_readq(iommu->reg, DMAR_IRTA_REG);
             int nr_entry = 1 << ((iremap_maddr & 0xF) + 1);
-            struct iremap_entry *iremap_entries =
-                (struct iremap_entry *)map_vtd_domain_page(iremap_maddr);
+            struct iremap_entry *iremap_entries = NULL;
 
             printk("  Interrupt remapping table (nr_entry=0x%x. "
                 "Only dump P=1 entries here):\n", nr_entry);
@@ -235,7 +234,18 @@ static void dump_iommu_info(unsigned cha
                    "FPD P\n");
             for ( i = 0; i < nr_entry; i++ )
             {
-                struct iremap_entry *p = iremap_entries + i;
+                struct iremap_entry *p;
+                if ( i % (1 << IREMAP_ENTRY_ORDER) == 0 )
+                {
+                    /* This entry across page boundry */
+                    u64 entry_base = iremap_maddr +
+                        (( i >> IREMAP_ENTRY_ORDER ) << PAGE_SHIFT );
+                    if ( iremap_entries )
+                        unmap_vtd_domain_page(iremap_entries);
+                    iremap_entries =
+                        (struct iremap_entry *)map_vtd_domain_page(entry_base);
+                }
+                p = &iremap_entries[i % (1 << IREMAP_ENTRY_ORDER)];
 
                 if ( !p->lo.p )
                     continue;
@@ -246,8 +256,9 @@ static void dump_iommu_info(unsigned cha
                     (u32)p->lo.dlm, (u32)p->lo.tm, (u32)p->lo.rh,
                     (u32)p->lo.dm, (u32)p->lo.fpd, (u32)p->lo.p);
             }
-
-            unmap_vtd_domain_page(iremap_entries);
+            if ( iremap_entries )
+                unmap_vtd_domain_page(iremap_entries);
+
         }
     }
 

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.