[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-3.2-testing] Handle DRHDs with different supported AGAWs.



# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1217344395 -3600
# Node ID f830b47149a4ccd7c0b0be0eaa41add653c90c36
# Parent  6de4320d71f9da9228a130e2b02bd855c1c53cf3
Handle DRHDs with different supported AGAWs.

This changeset is back-ported from xen-unstable.
Signed-off-by: Dexuan Cui <dexuan.cui@xxxxxxxxx>

The original description of the changeset is:

vtd: Various cleanups and fixes:
* Handle DRHDs with different supported AGAWs. To support this we
create page tables which always have 4 levels, and skip top levels
for units which support only 2 or 3 levels.
* Handle systems with mixed DRHD support for cache snooping. We must
pessimistically CLFLUSH if any DRHD does not support snooping.

Signed-off-by: Keir Fraser <keir.fraser@xxxxxxxxxx>
xen-unstable changeset: 17755:ecd266cebcab648132d432899eabaecf8a168508
xen-unstable date: Fri May 30 15:06:08 2008 +0100
---
 xen/arch/x86/hvm/vmx/vtd/intel-iommu.c    |  175 +++++++++++++++---------------
 xen/include/asm-x86/hvm/vmx/intel-iommu.h |    1 
 xen/include/asm-x86/iommu.h               |    1 
 3 files changed, 95 insertions(+), 82 deletions(-)

diff -r 6de4320d71f9 -r f830b47149a4 xen/arch/x86/hvm/vmx/vtd/intel-iommu.c
--- a/xen/arch/x86/hvm/vmx/vtd/intel-iommu.c    Fri Jul 25 15:04:26 2008 +0100
+++ b/xen/arch/x86/hvm/vmx/vtd/intel-iommu.c    Tue Jul 29 16:13:15 2008 +0100
@@ -79,24 +79,28 @@ static void iommu_domid_release(struct d
     }
 }
 
-unsigned int x86_clflush_size;
-void clflush_cache_range(void *adr, int size)
+static unsigned int x86_clflush_size;
+static int iommus_incoherent;
+static void __iommu_flush_cache(void *addr, int size)
 {
     int i;
+
+    if ( !iommus_incoherent )
+        return;
+
     for ( i = 0; i < size; i += x86_clflush_size )
-        clflush(adr + i);
-}
-
-static void __iommu_flush_cache(struct iommu *iommu, void *addr, int size)
-{
-    if ( !ecap_coherent(iommu->ecap) )
-        clflush_cache_range(addr, size);
-}
-
-#define iommu_flush_cache_entry(iommu, addr) \
-       __iommu_flush_cache(iommu, addr, 8)
-#define iommu_flush_cache_page(iommu, addr) \
-       __iommu_flush_cache(iommu, addr, PAGE_SIZE_4K)
+        clflush((char*)addr + i);
+}
+
+void iommu_flush_cache_entry(void *addr)
+{
+    __iommu_flush_cache(addr, 8);
+}
+
+void iommu_flush_cache_page(void *addr)
+{
+    __iommu_flush_cache(addr, PAGE_SIZE_4K);
+}
 
 int nr_iommus;
 /* context entry handling */
@@ -119,11 +123,11 @@ static struct context_entry * device_to_
             return NULL;
         }
         memset((void *) phy_addr, 0, PAGE_SIZE);
-        iommu_flush_cache_page(iommu, (void *)phy_addr);
+        iommu_flush_cache_page((void *)phy_addr);
         phy_addr = virt_to_maddr((void *)phy_addr);
         set_root_value(*root, phy_addr);
         set_root_present(*root);
-        iommu_flush_cache_entry(iommu, root);
+        iommu_flush_cache_entry(root);
     }
     phy_addr = (unsigned long) get_context_addr(*root);
     context = (struct context_entry *)maddr_to_virt(phy_addr);
@@ -157,8 +161,6 @@ static struct page_info *addr_to_dma_pag
 static struct page_info *addr_to_dma_page(struct domain *domain, u64 addr)
 {
     struct hvm_iommu *hd = domain_hvm_iommu(domain);
-    struct acpi_drhd_unit *drhd;
-    struct iommu *iommu;
     int addr_width = agaw_to_width(hd->agaw);
     struct dma_pte *parent, *pte = NULL, *pgd;
     int level = agaw_to_level(hd->agaw);
@@ -166,9 +168,6 @@ static struct page_info *addr_to_dma_pag
     unsigned long flags;
     struct page_info *pg = NULL;
     u64 *vaddr = NULL;
-
-    drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
-    iommu = drhd->iommu;
 
     addr &= (((u64)1) << addr_width) - 1;
     spin_lock_irqsave(&hd->mapping_lock, flags);
@@ -200,7 +199,7 @@ static struct page_info *addr_to_dma_pag
                 return NULL;
             }
             memset(vaddr, 0, PAGE_SIZE);
-            iommu_flush_cache_page(iommu, vaddr);
+            iommu_flush_cache_page(vaddr);
 
             dma_set_pte_addr(*pte, page_to_maddr(pg));
 
@@ -210,7 +209,7 @@ static struct page_info *addr_to_dma_pag
              */
             dma_set_pte_readable(*pte);
             dma_set_pte_writable(*pte);
-            iommu_flush_cache_entry(iommu, pte);
+            iommu_flush_cache_entry(pte);
         }
         else
         {
@@ -549,8 +548,6 @@ static void dma_pte_clear_one(struct dom
     struct dma_pte *pte = NULL;
     struct page_info *pg = NULL;
 
-    drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
-
     /* get last level pte */
     pg = dma_addr_level_page(domain, addr, 1);
     if ( !pg )
@@ -560,7 +557,7 @@ static void dma_pte_clear_one(struct dom
     if ( pte )
     {
         dma_clear_pte(*pte);
-        iommu_flush_cache_entry(drhd->iommu, pte);
+        iommu_flush_cache_entry(pte);
 
         for_each_drhd_unit ( drhd )
         {
@@ -602,18 +599,13 @@ static void dma_pte_clear_range(struct d
 /* free page table pages. last level pte should already be cleared */
 void dma_pte_free_pagetable(struct domain *domain, u64 start, u64 end)
 {
-    struct acpi_drhd_unit *drhd;
     struct hvm_iommu *hd = domain_hvm_iommu(domain);
-    struct iommu *iommu;
     int addr_width = agaw_to_width(hd->agaw);
     struct dma_pte *pte;
     int total = agaw_to_level(hd->agaw);
     int level;
     u64 tmp;
     struct page_info *pg = NULL;
-
-    drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
-    iommu = drhd->iommu;
 
     start &= (((u64)1) << addr_width) - 1;
     end &= (((u64)1) << addr_width) - 1;
@@ -637,7 +629,7 @@ void dma_pte_free_pagetable(struct domai
             pte = (struct dma_pte *)map_domain_page(page_to_mfn(pg));
             pte += address_level_offset(tmp, level);
             dma_clear_pte(*pte);
-            iommu_flush_cache_entry(iommu, pte);
+            iommu_flush_cache_entry(pte);
             unmap_domain_page(pte);
             free_domheap_page(pg);
 
@@ -677,7 +669,7 @@ static int iommu_set_root_entry(struct i
             return -ENOMEM;
 
         memset((u8*)root, 0, PAGE_SIZE);
-        iommu_flush_cache_page(iommu, root);
+        iommu_flush_cache_page(root);
 
         if ( cmpxchg((unsigned long *)&iommu->root_entry,
                      0, (unsigned long)root) != 0 )
@@ -963,6 +955,8 @@ struct iommu *iommu_alloc(void *hw_data)
 {
     struct acpi_drhd_unit *drhd = (struct acpi_drhd_unit *) hw_data;
     struct iommu *iommu;
+    unsigned long sagaw;
+    int agaw;
 
     if ( nr_iommus > MAX_IOMMUS )
     {
@@ -991,6 +985,23 @@ struct iommu *iommu_alloc(void *hw_data)
 
     iommu->cap = dmar_readq(iommu->reg, DMAR_CAP_REG);
     iommu->ecap = dmar_readq(iommu->reg, DMAR_ECAP_REG);
+
+    /* Calculate number of pagetable levels: between 2 and 4. */
+    sagaw = cap_sagaw(iommu->cap);
+    for ( agaw = level_to_agaw(4); agaw >= 0; agaw-- )
+        if ( test_bit(agaw, &sagaw) )
+            break;
+    if ( agaw < 0 )
+    {
+        gdprintk(XENLOG_ERR VTDPREFIX,
+                 "IOMMU: unsupported sagaw %lx\n", sagaw);
+        xfree(iommu);
+        return NULL;
+    }
+    iommu->nr_pt_levels = agaw_to_level(agaw);
+
+    if ( !ecap_coherent(iommu->ecap) )
+        iommus_incoherent = 1;
 
     spin_lock_init(&iommu->lock);
     spin_lock_init(&iommu->register_lock);
@@ -1025,9 +1036,6 @@ int iommu_domain_init(struct domain *dom
 {
     struct hvm_iommu *hd = domain_hvm_iommu(domain);
     struct iommu *iommu = NULL;
-    int guest_width = DEFAULT_DOMAIN_ADDRESS_WIDTH;
-    int adjust_width, agaw;
-    unsigned long sagaw;
     struct acpi_drhd_unit *drhd;
 
     spin_lock_init(&hd->mapping_lock);
@@ -1041,22 +1049,7 @@ int iommu_domain_init(struct domain *dom
     for_each_drhd_unit ( drhd )
         iommu = drhd->iommu ? : iommu_alloc(drhd);
 
-    /* calculate AGAW */
-    if (guest_width > cap_mgaw(iommu->cap))
-        guest_width = cap_mgaw(iommu->cap);
-    adjust_width = guestwidth_to_adjustwidth(guest_width);
-    agaw = width_to_agaw(adjust_width);
-    /* FIXME: hardware doesn't support it, choose a bigger one? */
-    sagaw = cap_sagaw(iommu->cap);
-    if ( !test_bit(agaw, &sagaw) )
-    {
-        gdprintk(XENLOG_ERR VTDPREFIX,
-                 "IOMMU: hardware doesn't support the agaw\n");
-        agaw = find_next_bit(&sagaw, 5, agaw);
-        if ( agaw >= 5 )
-            return -ENODEV;
-    }
-    hd->agaw = agaw;
+    hd->agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH);
     return 0;
 }
 
@@ -1069,6 +1062,8 @@ static int domain_context_mapping_one(
     struct context_entry *context;
     unsigned long flags;
     int ret = 0;
+    u64 pgd_maddr;
+    int agaw = -1;
 
     context = device_to_context_entry(iommu, bus, devfn);
     if ( !context )
@@ -1089,36 +1084,54 @@ static int domain_context_mapping_one(
     }
 
     spin_lock_irqsave(&iommu->lock, flags);
+
+    if ( ecap_pass_thru(iommu->ecap) )
+        context_set_translation_type(*context, CONTEXT_TT_PASS_THRU);
+    else
+    {
+        /* Ensure we have pagetables allocated down to leaf PTE. */
+        if ( !hd->pgd )
+        {
+            addr_to_dma_page(domain, 0);
+            if ( !hd->pgd )
+            {
+            nomem:
+                spin_unlock_irqrestore(&hd->mapping_lock, flags);
+                return -ENOMEM;
+            }
+        }
+ 
+        /* Skip top levels of page tables for 2- and 3-level DRHDs. */
+        pgd_maddr = virt_to_maddr(hd->pgd);
+        for ( agaw = level_to_agaw(4);
+              agaw != level_to_agaw(iommu->nr_pt_levels);
+              agaw-- )
+        {
+            if ( agaw == level_to_agaw(4) )
+                pgd_maddr = dma_pte_addr(*hd->pgd);
+            else
+            {
+                struct dma_pte *p = map_domain_page(pgd_maddr);
+                pgd_maddr = dma_pte_addr(*p);
+                unmap_domain_page(p);
+                if ( pgd_maddr == 0 )
+                    goto nomem;
+            } 
+        }
+        context_set_address_root(*context, pgd_maddr);
+        context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
+    }
+
     /*
      * domain_id 0 is not valid on Intel's IOMMU, force domain_id to
      * be 1 based as required by intel's iommu hw.
      */
+    BUG_ON(agaw == -1);
     context_set_domain_id(context, domain);
-    context_set_address_width(*context, hd->agaw);
-
-    if ( ecap_pass_thru(iommu->ecap) )
-        context_set_translation_type(*context, CONTEXT_TT_PASS_THRU);
-    else
-    {
-        if ( !hd->pgd )
-        {
-            struct dma_pte *pgd = (struct dma_pte *)alloc_xenheap_page();
-            if ( !pgd )
-            {
-                spin_unlock_irqrestore(&hd->mapping_lock, flags);
-                return -ENOMEM;
-            }
-            memset(pgd, 0, PAGE_SIZE);
-            hd->pgd = pgd;
-        }
- 
-        context_set_address_root(*context, virt_to_maddr(hd->pgd));
-        context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
-    }
-
+    context_set_address_width(*context, agaw);
     context_set_fault_enable(*context);
     context_set_present(*context);
-    iommu_flush_cache_entry(iommu, context);
+    iommu_flush_cache_entry(context);
 
     gdprintk(XENLOG_INFO VTDPREFIX,
              "domain_context_mapping_one-%x:%x:%x-*context=%"PRIx64":%"PRIx64
@@ -1315,7 +1328,7 @@ static int domain_context_unmap_one(
     spin_lock_irqsave(&iommu->lock, flags);
     context_clear_present(*context);
     context_clear_entry(*context);
-    iommu_flush_cache_entry(iommu, context);
+    iommu_flush_cache_entry(context);
     iommu_flush_context_global(iommu, 0);
     iommu_flush_iotlb_global(iommu, 0);
     spin_unlock_irqrestore(&iommu->lock, flags);
@@ -1558,7 +1571,7 @@ int iommu_map_page(struct domain *d, pad
     pte += gfn & LEVEL_MASK;
     dma_set_pte_addr(*pte, mfn << PAGE_SHIFT_4K);
     dma_set_pte_prot(*pte, DMA_PTE_READ | DMA_PTE_WRITE);
-    iommu_flush_cache_entry(iommu, pte);
+    iommu_flush_cache_entry(pte);
     unmap_domain_page(pte);
 
     for_each_drhd_unit ( drhd )
@@ -1606,8 +1619,6 @@ int iommu_page_mapping(struct domain *do
     int index;
     struct page_info *pg = NULL;
 
-    drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
-    iommu = drhd->iommu;
     if ( (prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0 )
         return -EINVAL;
     iova = (iova >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K;
@@ -1624,7 +1635,7 @@ int iommu_page_mapping(struct domain *do
         pte += start_pfn & LEVEL_MASK;
         dma_set_pte_addr(*pte, start_pfn << PAGE_SHIFT_4K);
         dma_set_pte_prot(*pte, prot);
-        iommu_flush_cache_entry(iommu, pte);
+        iommu_flush_cache_entry(pte);
         unmap_domain_page(pte);
         start_pfn++;
         index++;
@@ -1675,7 +1686,7 @@ void iommu_flush(struct domain *d, dma_a
             iommu_flush_write_buffer(iommu);
     }
 
-    iommu_flush_cache_entry(iommu, pte);
+    iommu_flush_cache_entry(pte);
 }
 
 static int iommu_prepare_rmrr_dev(
diff -r 6de4320d71f9 -r f830b47149a4 xen/include/asm-x86/hvm/vmx/intel-iommu.h
--- a/xen/include/asm-x86/hvm/vmx/intel-iommu.h Fri Jul 25 15:04:26 2008 +0100
+++ b/xen/include/asm-x86/hvm/vmx/intel-iommu.h Tue Jul 29 16:13:15 2008 +0100
@@ -232,6 +232,7 @@ struct context_entry {
 /* page table handling */
 #define LEVEL_STRIDE       (9)
 #define LEVEL_MASK         ((1 << LEVEL_STRIDE) - 1)
+#define level_to_agaw(val) ((val) - 2)
 #define agaw_to_level(val) ((val) + 2)
 #define agaw_to_width(val) (30 + val * LEVEL_STRIDE)
 #define width_to_agaw(w)   ((w - 30)/LEVEL_STRIDE)
diff -r 6de4320d71f9 -r f830b47149a4 xen/include/asm-x86/iommu.h
--- a/xen/include/asm-x86/iommu.h       Fri Jul 25 15:04:26 2008 +0100
+++ b/xen/include/asm-x86/iommu.h       Tue Jul 29 16:13:15 2008 +0100
@@ -56,6 +56,7 @@ struct iommu {
     void __iomem *reg; /* Pointer to hardware regs, virtual addr */
     u32        index;         /* Sequence number of iommu */
     u32        gcmd;          /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */
+    u32        nr_pt_levels;
     u64        cap;
     u64        ecap;
     spinlock_t lock; /* protect context, domain ids */

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.