[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] VT-d: Allocates page table pgd, root_entry, iremap and qinval from



# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1207815758 -3600
# Node ID 1d3aaa6a8b870805e16dcf162223fb2edd9de26d
# Parent  85848be18ba22814bddeb82a4cfc99e14447cab1
VT-d: Allocates page table pgd, root_entry, iremap and qinval from
domheap rather than xenheap, and get rid of structure page_info in
iommu.c.

Signed-off-by: Weidong Han <weidong.han@xxxxxxxxx>
---
 xen/drivers/passthrough/vtd/intremap.c |   51 ++-
 xen/drivers/passthrough/vtd/iommu.c    |  458 +++++++++------------------------
 xen/drivers/passthrough/vtd/iommu.h    |    4 
 xen/drivers/passthrough/vtd/qinval.c   |   94 +++---
 xen/drivers/passthrough/vtd/utils.c    |   40 +-
 xen/drivers/passthrough/vtd/x86/vtd.c  |  177 ++++++++++++
 xen/include/xen/hvm/iommu.h            |    2 
 xen/include/xen/iommu.h                |    3 
 8 files changed, 421 insertions(+), 408 deletions(-)

diff -r 85848be18ba2 -r 1d3aaa6a8b87 xen/drivers/passthrough/vtd/intremap.c
--- a/xen/drivers/passthrough/vtd/intremap.c    Thu Apr 10 09:20:07 2008 +0100
+++ b/xen/drivers/passthrough/vtd/intremap.c    Thu Apr 10 09:22:38 2008 +0100
@@ -45,7 +45,7 @@ static void remap_entry_to_ioapic_rte(
 static void remap_entry_to_ioapic_rte(
     struct iommu *iommu, struct IO_APIC_route_entry *old_rte)
 {
-    struct iremap_entry *iremap_entry = NULL;
+    struct iremap_entry *iremap_entry = NULL, *iremap_entries;
     struct IO_APIC_route_remap_entry *remap_rte;
     unsigned int index;
     unsigned long flags;
@@ -70,7 +70,9 @@ static void remap_entry_to_ioapic_rte(
 
     spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
 
-    iremap_entry = &ir_ctrl->iremap[index];
+    iremap_entries =
+        (struct iremap_entry *)map_vtd_domain_page(ir_ctrl->iremap_maddr);
+    iremap_entry = &iremap_entries[index];
 
     old_rte->vector = iremap_entry->lo.vector;
     old_rte->delivery_mode = iremap_entry->lo.dlm;
@@ -80,13 +82,14 @@ static void remap_entry_to_ioapic_rte(
     old_rte->dest.logical.__reserved_1 = 0;
     old_rte->dest.logical.logical_dest = iremap_entry->lo.dst;
 
+    unmap_vtd_domain_page(iremap_entries);
     spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
 }
 
 static void ioapic_rte_to_remap_entry(struct iommu *iommu,
     int apic_id, struct IO_APIC_route_entry *old_rte)
 {
-    struct iremap_entry *iremap_entry = NULL;
+    struct iremap_entry *iremap_entry = NULL, *iremap_entries;
     struct IO_APIC_route_remap_entry *remap_rte;
     unsigned int index;
     unsigned long flags;
@@ -103,7 +106,10 @@ static void ioapic_rte_to_remap_entry(st
         goto out;
     }
 
-    iremap_entry = &(ir_ctrl->iremap[index]);
+    iremap_entries =
+        (struct iremap_entry *)map_vtd_domain_page(ir_ctrl->iremap_maddr);
+    iremap_entry = &iremap_entries[index];
+
     if ( *(u64 *)iremap_entry != 0 )
         dprintk(XENLOG_WARNING VTDPREFIX,
                "Interrupt remapping entry is in use already!\n");
@@ -124,12 +130,13 @@ static void ioapic_rte_to_remap_entry(st
     iremap_entry->lo.p = 1;    /* finally, set present bit */
     ir_ctrl->iremap_index++;
 
+    unmap_vtd_domain_page(iremap_entries);
     iommu_flush_iec_index(iommu, 0, index);
     ret = invalidate_sync(iommu);
 
-    /* now construct new ioapic rte entry */ 
+    /* now construct new ioapic rte entry */
     remap_rte->vector = old_rte->vector;
-    remap_rte->delivery_mode = 0;    /* has to be 0 for remap format */ 
+    remap_rte->delivery_mode = 0;    /* has to be 0 for remap format */
     remap_rte->index_15 = index & 0x8000;
     remap_rte->index_0_14 = index & 0x7fff;
     remap_rte->delivery_status = old_rte->delivery_status;
@@ -154,7 +161,7 @@ io_apic_read_remap_rte(
     struct iommu *iommu = ioapic_to_iommu(mp_ioapics[apic].mpc_apicid);
     struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
 
-    if ( !iommu || !ir_ctrl || !(ir_ctrl->iremap) )
+    if ( !iommu || !ir_ctrl || ir_ctrl->iremap_maddr == 0 )
     {
         *IO_APIC_BASE(apic) = reg;
         return *(IO_APIC_BASE(apic)+4);
@@ -200,7 +207,7 @@ io_apic_write_remap_rte(
     struct iommu *iommu = ioapic_to_iommu(mp_ioapics[apic].mpc_apicid);
     struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
 
-    if ( !iommu || !ir_ctrl || !(ir_ctrl->iremap) )
+    if ( !iommu || !ir_ctrl || ir_ctrl->iremap_maddr == 0 )
     {
         *IO_APIC_BASE(apic) = reg;
         *(IO_APIC_BASE(apic)+4) = value;
@@ -238,32 +245,30 @@ int intremap_setup(struct iommu *iommu)
 {
     struct ir_ctrl *ir_ctrl;
     unsigned long start_time;
-    u64 paddr;
 
     if ( !ecap_intr_remap(iommu->ecap) )
         return -ENODEV;
 
     ir_ctrl = iommu_ir_ctrl(iommu);
-    if ( ir_ctrl->iremap == NULL )
-    {
-        ir_ctrl->iremap = alloc_xenheap_page();
-        if ( ir_ctrl->iremap == NULL )
+    if ( ir_ctrl->iremap_maddr == 0 )
+    {
+        ir_ctrl->iremap_maddr = alloc_pgtable_maddr();
+        if ( ir_ctrl->iremap_maddr == 0 )
         {
             dprintk(XENLOG_WARNING VTDPREFIX,
-                    "Cannot allocate memory for ir_ctrl->iremap\n");
-            return -ENODEV;
-        }
-        memset(ir_ctrl->iremap, 0, PAGE_SIZE);
-    }
-
-    paddr = virt_to_maddr(ir_ctrl->iremap);
+                    "Cannot allocate memory for ir_ctrl->iremap_maddr\n");
+            return -ENODEV;
+        }
+    }
+
 #if defined(ENABLED_EXTENDED_INTERRUPT_SUPPORT)
     /* set extended interrupt mode bit */
-    paddr |= ecap_ext_intr(iommu->ecap) ? (1 << IRTA_REG_EIMI_SHIFT) : 0;
+    ir_ctrl->iremap_maddr |=
+            ecap_ext_intr(iommu->ecap) ? (1 << IRTA_REG_EIMI_SHIFT) : 0;
 #endif
     /* size field = 256 entries per 4K page = 8 - 1 */
-    paddr |= 7;
-    dmar_writeq(iommu->reg, DMAR_IRTA_REG, paddr);
+    ir_ctrl->iremap_maddr |= 7;
+    dmar_writeq(iommu->reg, DMAR_IRTA_REG, ir_ctrl->iremap_maddr);
 
     /* set SIRTP */
     iommu->gcmd |= DMA_GCMD_SIRTP;
diff -r 85848be18ba2 -r 1d3aaa6a8b87 xen/drivers/passthrough/vtd/iommu.c
--- a/xen/drivers/passthrough/vtd/iommu.c       Thu Apr 10 09:20:07 2008 +0100
+++ b/xen/drivers/passthrough/vtd/iommu.c       Thu Apr 10 09:22:38 2008 +0100
@@ -185,71 +185,70 @@ void iommu_flush_cache_page(struct iommu
 
 int nr_iommus;
 /* context entry handling */
-static struct context_entry * device_to_context_entry(struct iommu *iommu,
-                                                      u8 bus, u8 devfn)
-{
-    struct root_entry *root;
-    struct context_entry *context;
-    unsigned long phy_addr;
+static u64 bus_to_context_maddr(struct iommu *iommu, u8 bus)
+{
+    struct root_entry *root, *root_entries;
     unsigned long flags;
+    u64 maddr;
 
     spin_lock_irqsave(&iommu->lock, flags);
-    root = &iommu->root_entry[bus];
+    root_entries = (struct root_entry *)map_vtd_domain_page(iommu->root_maddr);
+    root = &root_entries[bus];
     if ( !root_present(*root) )
     {
-        phy_addr = (unsigned long) alloc_xenheap_page();
-        if ( !phy_addr )
+        maddr = alloc_pgtable_maddr();
+        if ( maddr == 0 )
         {
             spin_unlock_irqrestore(&iommu->lock, flags);
-            return NULL;
+            return 0;
         }
-        memset((void *) phy_addr, 0, PAGE_SIZE);
-        iommu_flush_cache_page(iommu, (void *)phy_addr);
-        phy_addr = virt_to_maddr((void *)phy_addr);
-        set_root_value(*root, phy_addr);
+        set_root_value(*root, maddr);
         set_root_present(*root);
         iommu_flush_cache_entry(iommu, root);
     }
-    phy_addr = (unsigned long) get_context_addr(*root);
-    context = (struct context_entry *)maddr_to_virt(phy_addr);
+    maddr = (u64) get_context_addr(*root);
+    unmap_vtd_domain_page(root_entries);
     spin_unlock_irqrestore(&iommu->lock, flags);
-    return &context[devfn];
+    return maddr;
 }
 
 static int device_context_mapped(struct iommu *iommu, u8 bus, u8 devfn)
 {
-    struct root_entry *root;
+    struct root_entry *root, *root_entries;
     struct context_entry *context;
-    unsigned long phy_addr;
+    u64 context_maddr;
     int ret;
     unsigned long flags;
 
     spin_lock_irqsave(&iommu->lock, flags);
-    root = &iommu->root_entry[bus];
+    root_entries = (struct root_entry *)map_vtd_domain_page(iommu->root_maddr);
+    root = &root_entries[bus];
     if ( !root_present(*root) )
     {
         ret = 0;
         goto out;
     }
-    phy_addr = get_context_addr(*root);
-    context = (struct context_entry *)maddr_to_virt(phy_addr);
+    context_maddr = get_context_addr(*root);
+    context = (struct context_entry *)map_vtd_domain_page(context_maddr);
     ret = context_present(context[devfn]);
+    unmap_vtd_domain_page(context);
  out:
+    unmap_vtd_domain_page(root_entries);
     spin_unlock_irqrestore(&iommu->lock, flags);
     return ret;
 }
 
-static struct page_info *addr_to_dma_page(struct domain *domain, u64 addr)
+static u64 addr_to_dma_page_maddr(struct domain *domain, u64 addr)
 {
     struct hvm_iommu *hd = domain_hvm_iommu(domain);
     struct acpi_drhd_unit *drhd;
     struct iommu *iommu;
     int addr_width = agaw_to_width(hd->agaw);
-    struct dma_pte *parent, *pte = NULL, *pgd;
+    struct dma_pte *parent, *pte = NULL;
     int level = agaw_to_level(hd->agaw);
     int offset;
     unsigned long flags;
-    struct page_info *pg = NULL;
+    u64 pte_maddr = 0;
     u64 *vaddr = NULL;
 
     drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
@@ -257,19 +256,14 @@ static struct page_info *addr_to_dma_pag
 
     addr &= (((u64)1) << addr_width) - 1;
     spin_lock_irqsave(&hd->mapping_lock, flags);
-    if ( !hd->pgd )
-    {
-        pgd = (struct dma_pte *)alloc_xenheap_page();
-        if ( !pgd )
-        {
-            spin_unlock_irqrestore(&hd->mapping_lock, flags);
-            return NULL;
-        }
-        memset(pgd, 0, PAGE_SIZE);
-        hd->pgd = pgd;
-    }
-
-    parent = hd->pgd;
+    if ( hd->pgd_maddr == 0 )
+    {
+        hd->pgd_maddr = alloc_pgtable_maddr();
+        if ( hd->pgd_maddr == 0 )
+            return 0;
+    }
+
+    parent = (struct dma_pte *)map_vtd_domain_page(hd->pgd_maddr);
     while ( level > 1 )
     {
         offset = address_level_offset(addr, level);
@@ -277,18 +271,15 @@ static struct page_info *addr_to_dma_pag
 
         if ( dma_pte_addr(*pte) == 0 )
         {
-            pg = alloc_domheap_page(
-                NULL, MEMF_node(domain_to_node(domain)));
-            vaddr = map_domain_page(page_to_mfn(pg));
+            u64 maddr = alloc_pgtable_maddr();
+            dma_set_pte_addr(*pte, maddr);
+            vaddr = map_vtd_domain_page(maddr);
             if ( !vaddr )
             {
+                unmap_vtd_domain_page(parent);
                 spin_unlock_irqrestore(&hd->mapping_lock, flags);
-                return NULL;
+                return 0;
             }
-            memset(vaddr, 0, PAGE_SIZE);
-            iommu_flush_cache_page(iommu, vaddr);
-
-            dma_set_pte_addr(*pte, page_to_maddr(pg));
 
             /*
              * high level table always sets r/w, last level
@@ -300,21 +291,20 @@ static struct page_info *addr_to_dma_pag
         }
         else
         {
-            pg = maddr_to_page(pte->val);
-            vaddr = map_domain_page(page_to_mfn(pg));
+            vaddr = map_vtd_domain_page(pte->val);
             if ( !vaddr )
             {
+                unmap_vtd_domain_page(parent);
                 spin_unlock_irqrestore(&hd->mapping_lock, flags);
-                return NULL;
+                return 0;
             }
         }
 
-        if ( parent != hd->pgd )
-            unmap_domain_page(parent);
-
-        if ( level == 2 && vaddr )
+        unmap_vtd_domain_page(parent);
+        if ( level == 2 )
         {
-            unmap_domain_page(vaddr);
+            pte_maddr = pte->val & PAGE_MASK_4K;
+            unmap_vtd_domain_page(vaddr);
             break;
         }
 
@@ -324,43 +314,42 @@ static struct page_info *addr_to_dma_pag
     }
 
     spin_unlock_irqrestore(&hd->mapping_lock, flags);
-    return pg;
+    return pte_maddr;
 }
 
 /* return address's page at specific level */
-static struct page_info *dma_addr_level_page(struct domain *domain,
-                                             u64 addr, int level)
+static u64 dma_addr_level_page_maddr(
+    struct domain *domain, u64 addr, int level)
 {
     struct hvm_iommu *hd = domain_hvm_iommu(domain);
     struct dma_pte *parent, *pte = NULL;
     int total = agaw_to_level(hd->agaw);
     int offset;
-    struct page_info *pg = NULL;
-
-    parent = hd->pgd;
+    u64 pg_maddr = hd->pgd_maddr;
+
+    if ( pg_maddr == 0 )
+        return 0;
+
+    parent = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
     while ( level <= total )
     {
         offset = address_level_offset(addr, total);
         pte = &parent[offset];
         if ( dma_pte_addr(*pte) == 0 )
-        {
-            if ( parent != hd->pgd )
-                unmap_domain_page(parent);
             break;
-        }
-
-        pg = maddr_to_page(pte->val);
-        if ( parent != hd->pgd )
-            unmap_domain_page(parent);
+
+        pg_maddr = pte->val & PAGE_MASK_4K;
+        unmap_vtd_domain_page(parent);
 
         if ( level == total )
-            return pg;
-
-        parent = map_domain_page(page_to_mfn(pg));
+            return pg_maddr;
+
+        parent = map_vtd_domain_page(pte->val);
         total--;
     }
 
-    return NULL;
+    unmap_vtd_domain_page(parent);
+    return 0;
 }
 
 static void iommu_flush_write_buffer(struct iommu *iommu)
@@ -639,17 +628,17 @@ static void dma_pte_clear_one(struct dom
 {
     struct acpi_drhd_unit *drhd;
     struct iommu *iommu;
-    struct dma_pte *pte = NULL;
-    struct page_info *pg = NULL;
+    struct dma_pte *page = NULL, *pte = NULL;
+    u64 pg_maddr;
 
     drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
 
     /* get last level pte */
-    pg = dma_addr_level_page(domain, addr, 1);
-    if ( !pg )
+    pg_maddr = dma_addr_level_page_maddr(domain, addr, 1);
+    if ( pg_maddr == 0 )
         return;
-    pte = (struct dma_pte *)map_domain_page(page_to_mfn(pg));
-    pte += address_level_offset(addr, 1);
+    page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
+    pte = page + address_level_offset(addr, 1);
     if ( pte )
     {
         dma_clear_pte(*pte);
@@ -665,7 +654,7 @@ static void dma_pte_clear_one(struct dom
                 iommu_flush_write_buffer(iommu);
         }
     }
-    unmap_domain_page(pte);
+    unmap_vtd_domain_page(page);
 }
 
 /* clear last level pte, a tlb flush should be followed */
@@ -695,11 +684,11 @@ void dma_pte_free_pagetable(struct domai
     struct hvm_iommu *hd = domain_hvm_iommu(domain);
     struct iommu *iommu;
     int addr_width = agaw_to_width(hd->agaw);
-    struct dma_pte *pte;
+    struct dma_pte *page, *pte;
     int total = agaw_to_level(hd->agaw);
     int level;
     u32 tmp;
-    struct page_info *pg = NULL;
+    u64 pg_maddr;
 
     drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
     iommu = drhd->iommu;
@@ -717,15 +706,15 @@ void dma_pte_free_pagetable(struct domai
 
         while ( tmp < end )
         {
-            pg = dma_addr_level_page(domain, tmp, level);
-            if ( !pg )
+            pg_maddr = dma_addr_level_page_maddr(domain, tmp, level);
+            if ( pg_maddr == 0 )
                 return;
-            pte = (struct dma_pte *)map_domain_page(page_to_mfn(pg));
-            pte += address_level_offset(tmp, level);
+            page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
+            pte = page + address_level_offset(tmp, level);
             dma_clear_pte(*pte);
             iommu_flush_cache_entry(iommu, pte);
-            unmap_domain_page(pte);
-            free_domheap_page(pg);
+            unmap_vtd_domain_page(page);
+            free_pgtable_maddr(pg_maddr);
 
             tmp += level_size(level);
         }
@@ -735,17 +724,15 @@ void dma_pte_free_pagetable(struct domai
     /* free pgd */
     if ( start == 0 && end == ((((u64)1) << addr_width) - 1) )
     {
-        free_xenheap_page((void *)hd->pgd);
-        hd->pgd = NULL;
+        free_pgtable_maddr(hd->pgd_maddr);
+        hd->pgd_maddr = 0;
     }
 }
 
 /* iommu handling */
 static int iommu_set_root_entry(struct iommu *iommu)
 {
-    void *addr;
     u32 cmd, sts;
-    struct root_entry *root;
     unsigned long flags;
 
     if ( iommu == NULL )
@@ -755,25 +742,19 @@ static int iommu_set_root_entry(struct i
         return -EINVAL;
     }
 
-    if ( unlikely(!iommu->root_entry) )
-    {
-        root = (struct root_entry *)alloc_xenheap_page();
-        if ( root == NULL )
-            return -ENOMEM;
-
-        memset((u8*)root, 0, PAGE_SIZE);
-        iommu_flush_cache_page(iommu, root);
-
-        if ( cmpxchg((unsigned long *)&iommu->root_entry,
-                     0, (unsigned long)root) != 0 )
-            free_xenheap_page((void *)root);
-    }
-
-    addr = iommu->root_entry;
+    if ( iommu->root_maddr != 0 )
+    {
+        free_pgtable_maddr(iommu->root_maddr);
+        iommu->root_maddr = 0;
+    }
 
     spin_lock_irqsave(&iommu->register_lock, flags);
 
-    dmar_writeq(iommu->reg, DMAR_RTADDR_REG, virt_to_maddr(addr));
+    iommu->root_maddr = alloc_pgtable_maddr();
+    if ( iommu->root_maddr == 0 )
+        return -ENOMEM;
+
+    dmar_writeq(iommu->reg, DMAR_RTADDR_REG, iommu->root_maddr);
     cmd = iommu->gcmd | DMA_GCMD_SRTP;
     dmar_writel(iommu->reg, DMAR_GCMD_REG, cmd);
 
@@ -1110,8 +1091,11 @@ static void free_iommu(struct iommu *iom
 {
     if ( !iommu )
         return;
-    if ( iommu->root_entry )
-        free_xenheap_page((void *)iommu->root_entry);
+    if ( iommu->root_maddr != 0 )
+    {
+        free_pgtable_maddr(iommu->root_maddr);
+        iommu->root_maddr = 0;
+    }
     if ( iommu->reg )
         iounmap(iommu->reg);
     free_intel_iommu(iommu->intel);
@@ -1166,13 +1150,17 @@ static int domain_context_mapping_one(
     u8 bus, u8 devfn)
 {
     struct hvm_iommu *hd = domain_hvm_iommu(domain);
-    struct context_entry *context;
+    struct context_entry *context, *context_entries;
     unsigned long flags;
     int ret = 0;
-
-    context = device_to_context_entry(iommu, bus, devfn);
+    u64 maddr;
+
+    maddr = bus_to_context_maddr(iommu, bus);
+    context_entries = (struct context_entry *)map_vtd_domain_page(maddr);
+    context = &context_entries[devfn];
     if ( !context )
     {
+        unmap_vtd_domain_page(context_entries);
         gdprintk(XENLOG_ERR VTDPREFIX,
                  "domain_context_mapping_one:context == NULL:"
                  "bdf = %x:%x:%x\n",
@@ -1182,6 +1170,7 @@ static int domain_context_mapping_one(
 
     if ( context_present(*context) )
     {
+        unmap_vtd_domain_page(context_entries);
         gdprintk(XENLOG_WARNING VTDPREFIX,
                  "domain_context_mapping_one:context present:bdf=%x:%x:%x\n",
                  bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
@@ -1202,19 +1191,8 @@ static int domain_context_mapping_one(
     else
     {
 #endif
-        if ( !hd->pgd )
-        {
-            struct dma_pte *pgd = (struct dma_pte *)alloc_xenheap_page();
-            if ( !pgd )
-            {
-                spin_unlock_irqrestore(&hd->mapping_lock, flags);
-                return -ENOMEM;
-            }
-            memset(pgd, 0, PAGE_SIZE);
-            hd->pgd = pgd;
-        }
- 
-        context_set_address_root(*context, virt_to_maddr(hd->pgd));
+        ASSERT(hd->pgd_maddr != 0);
+        context_set_address_root(*context, hd->pgd_maddr);
         context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
 #ifdef CONTEXT_PASSTHRU
     }
@@ -1226,9 +1204,11 @@ static int domain_context_mapping_one(
 
     gdprintk(XENLOG_INFO VTDPREFIX,
              "domain_context_mapping_one-%x:%x:%x-*context=%"PRIx64":%"PRIx64
-             " hd->pgd=%p\n",
+             " hd->pgd_maddr=%"PRIx64"\n",
              bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
-             context->hi, context->lo, hd->pgd);
+             context->hi, context->lo, hd->pgd_maddr);
+
+    unmap_vtd_domain_page(context_entries);
 
     if ( iommu_flush_context_device(iommu, domain_iommu_domid(domain),
                                     (((u16)bus) << 8) | devfn,
@@ -1389,12 +1369,16 @@ static int domain_context_unmap_one(
     struct iommu *iommu,
     u8 bus, u8 devfn)
 {
-    struct context_entry *context;
+    struct context_entry *context, *context_entries;
     unsigned long flags;
-
-    context = device_to_context_entry(iommu, bus, devfn);
+    u64 maddr;
+
+    maddr = bus_to_context_maddr(iommu, bus);
+    context_entries = (struct context_entry *)map_vtd_domain_page(maddr);
+    context = &context_entries[devfn];
     if ( !context )
     {
+        unmap_vtd_domain_page(context_entries);
         gdprintk(XENLOG_ERR VTDPREFIX,
                  "domain_context_unmap_one-%x:%x:%x- context == NULL:return\n",
                  bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
@@ -1403,6 +1387,7 @@ static int domain_context_unmap_one(
 
     if ( !context_present(*context) )
     {
+        unmap_vtd_domain_page(context_entries);
         gdprintk(XENLOG_WARNING VTDPREFIX,
                  "domain_context_unmap_one-%x:%x:%x- "
                  "context NOT present:return\n",
@@ -1420,6 +1405,7 @@ static int domain_context_unmap_one(
     iommu_flush_cache_entry(iommu, context);
     iommu_flush_context_global(iommu, 0);
     iommu_flush_iotlb_global(iommu, 0);
+    unmap_vtd_domain_page(context_entries);
     spin_unlock_irqrestore(&iommu->lock, flags);
 
     return 0;
@@ -1575,36 +1561,7 @@ void iommu_domain_teardown(struct domain
         return;
 
     iommu_domid_release(d);
-
-#if CONFIG_PAGING_LEVELS == 3
-    {
-        struct hvm_iommu *hd  = domain_hvm_iommu(d);
-        int level = agaw_to_level(hd->agaw);
-        struct dma_pte *pgd = NULL;
-
-        switch ( level )
-        {
-        case VTD_PAGE_TABLE_LEVEL_3:
-            if ( hd->pgd )
-                free_xenheap_page((void *)hd->pgd);
-            break;
-        case VTD_PAGE_TABLE_LEVEL_4:
-            if ( hd->pgd )
-            {
-                pgd = hd->pgd;
-                if ( pgd[0].val != 0 )
-                    free_xenheap_page((void*)maddr_to_virt(
-                        dma_pte_addr(pgd[0])));
-                free_xenheap_page((void *)hd->pgd);
-            }
-            break;
-        default:
-            gdprintk(XENLOG_ERR VTDPREFIX,
-                     "Unsupported p2m table sharing level!\n");
-            break;
-        }
-    }
-#endif
+    iommu_free_pgd(d);
     return_devices_to_dom0(d);
 }
 
@@ -1630,8 +1587,8 @@ int intel_iommu_map_page(
 {
     struct acpi_drhd_unit *drhd;
     struct iommu *iommu;
-    struct dma_pte *pte = NULL;
-    struct page_info *pg = NULL;
+    struct dma_pte *page = NULL, *pte = NULL;
+    u64 pg_maddr;
 
     drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
     iommu = drhd->iommu;
@@ -1642,15 +1599,15 @@ int intel_iommu_map_page(
         return 0;
 #endif
 
-    pg = addr_to_dma_page(d, (paddr_t)gfn << PAGE_SHIFT_4K);
-    if ( !pg )
+    pg_maddr = addr_to_dma_page_maddr(d, gfn << PAGE_SHIFT_4K);
+    if ( pg_maddr == 0 )
         return -ENOMEM;
-    pte = (struct dma_pte *)map_domain_page(page_to_mfn(pg));
-    pte += gfn & LEVEL_MASK;
+    page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
+    pte = page + (gfn & LEVEL_MASK);
     dma_set_pte_addr(*pte, (paddr_t)mfn << PAGE_SHIFT_4K);
     dma_set_pte_prot(*pte, DMA_PTE_READ | DMA_PTE_WRITE);
     iommu_flush_cache_entry(iommu, pte);
-    unmap_domain_page(pte);
+    unmap_vtd_domain_page(page);
 
     for_each_drhd_unit ( drhd )
     {
@@ -1690,9 +1647,9 @@ int iommu_page_mapping(struct domain *do
     struct acpi_drhd_unit *drhd;
     struct iommu *iommu;
     unsigned long start_pfn, end_pfn;
-    struct dma_pte *pte = NULL;
+    struct dma_pte *page = NULL, *pte = NULL;
     int index;
-    struct page_info *pg = NULL;
+    u64 pg_maddr;
 
     drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
     iommu = drhd->iommu;
@@ -1705,15 +1662,15 @@ int iommu_page_mapping(struct domain *do
     index = 0;
     while ( start_pfn < end_pfn )
     {
-        pg = addr_to_dma_page(domain, iova + PAGE_SIZE_4K * index);
-        if ( !pg )
+        pg_maddr = addr_to_dma_page_maddr(domain, iova + PAGE_SIZE_4K * index);
+        if ( pg_maddr == 0 )
             return -ENOMEM;
-        pte = (struct dma_pte *)map_domain_page(page_to_mfn(pg));
-        pte += start_pfn & LEVEL_MASK;
+        page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
+        pte = page + (start_pfn & LEVEL_MASK);
         dma_set_pte_addr(*pte, start_pfn << PAGE_SHIFT_4K);
         dma_set_pte_prot(*pte, prot);
         iommu_flush_cache_entry(iommu, pte);
-        unmap_domain_page(pte);
+        unmap_vtd_domain_page(page);
         start_pfn++;
         index++;
     }
@@ -2050,159 +2007,6 @@ int intel_iommu_assign_device(struct dom
 
     return ret;
 }
-
-void iommu_set_pgd(struct domain *d)
-{
-    struct hvm_iommu *hd  = domain_hvm_iommu(d);
-    unsigned long p2m_table;
-
-    if ( hd->pgd )
-    {
-        gdprintk(XENLOG_INFO VTDPREFIX,
-                 "iommu_set_pgd_1: hd->pgd = %p\n", hd->pgd);
-        hd->pgd = NULL;
-    }
-    p2m_table = mfn_x(pagetable_get_mfn(d->arch.phys_table));
-
-    if ( paging_mode_hap(d) )
-    {
-        int level = agaw_to_level(hd->agaw);
-        struct dma_pte *dpte = NULL;
-        mfn_t pgd_mfn;
-
-        switch ( level )
-        {
-        case VTD_PAGE_TABLE_LEVEL_3:
-            dpte = map_domain_page(p2m_table);
-            if ( !dma_pte_present(*dpte) )
-            {
-                gdprintk(XENLOG_ERR VTDPREFIX,
-                         "iommu_set_pgd: second level wasn't there\n");
-                unmap_domain_page(dpte);
-                return;
-            }
-            pgd_mfn = _mfn(dma_pte_addr(*dpte) >> PAGE_SHIFT_4K);
-            unmap_domain_page(dpte);
-            hd->pgd = maddr_to_virt(pagetable_get_paddr(
-                pagetable_from_mfn(pgd_mfn)));
-            break;
-        case VTD_PAGE_TABLE_LEVEL_4:
-            pgd_mfn = _mfn(p2m_table);
-            hd->pgd = maddr_to_virt(pagetable_get_paddr(
-                pagetable_from_mfn(pgd_mfn)));
-            break;
-        default:
-            gdprintk(XENLOG_ERR VTDPREFIX,
-                     "iommu_set_pgd:Unsupported p2m table sharing level!\n");
-            break;
-        }
-    }
-    else
-    {
-#if CONFIG_PAGING_LEVELS == 3
-        int level = agaw_to_level(hd->agaw);
-        struct dma_pte *pmd = NULL;
-        struct dma_pte *pgd = NULL;
-        struct dma_pte *pte = NULL;
-        l3_pgentry_t *l3e;
-        unsigned long flags;
-        int i;
-
-        spin_lock_irqsave(&hd->mapping_lock, flags);
-        if ( !hd->pgd )
-        {
-            pgd = (struct dma_pte *)alloc_xenheap_page();
-            if ( !pgd )
-            {
-                spin_unlock_irqrestore(&hd->mapping_lock, flags);
-                gdprintk(XENLOG_ERR VTDPREFIX,
-                         "Allocate pgd memory failed!\n");
-                return;
-            }
-            memset(pgd, 0, PAGE_SIZE);
-            hd->pgd = pgd;
-       }
-
-        l3e = map_domain_page(p2m_table);
-        switch ( level )
-        {
-        case VTD_PAGE_TABLE_LEVEL_3:        /* Weybridge */
-            /* We only support 8 entries for the PAE L3 p2m table */
-            for ( i = 0; i < 8 ; i++ )
-            {
-                /* Don't create new L2 entry, use ones from p2m table */
-                pgd[i].val = l3e[i].l3 | _PAGE_PRESENT | _PAGE_RW;
-            }
-            break;
-
-        case VTD_PAGE_TABLE_LEVEL_4:        /* Stoakley */
-            /* We allocate one more page for the top vtd page table. */
-            pmd = (struct dma_pte *)alloc_xenheap_page();
-            if ( !pmd )
-            {
-                unmap_domain_page(l3e);
-                spin_unlock_irqrestore(&hd->mapping_lock, flags);
-                gdprintk(XENLOG_ERR VTDPREFIX,
-                         "Allocate pmd memory failed!\n");
-                return;
-            }
-            memset((u8*)pmd, 0, PAGE_SIZE);
-            pte = &pgd[0];
-            dma_set_pte_addr(*pte, virt_to_maddr(pmd));
-            dma_set_pte_readable(*pte);
-            dma_set_pte_writable(*pte);
-
-            for ( i = 0; i < 8; i++ )
-            {
-                /* Don't create new L2 entry, use ones from p2m table */
-                pmd[i].val = l3e[i].l3 | _PAGE_PRESENT | _PAGE_RW;
-            }
-            break;
-        default:
-            gdprintk(XENLOG_ERR VTDPREFIX,
-                     "iommu_set_pgd:Unsupported p2m table sharing level!\n");
-            break;
-        }
-        unmap_domain_page(l3e);
-        spin_unlock_irqrestore(&hd->mapping_lock, flags);
-#elif CONFIG_PAGING_LEVELS == 4
-        int level = agaw_to_level(hd->agaw);
-        l3_pgentry_t *l3e;
-        mfn_t pgd_mfn;
-
-        switch ( level )
-        {
-        case VTD_PAGE_TABLE_LEVEL_3:
-            l3e = map_domain_page(p2m_table);
-            if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 )
-            {
-                gdprintk(XENLOG_ERR VTDPREFIX,
-                         "iommu_set_pgd: second level wasn't there\n");
-                unmap_domain_page(l3e);
-                return;
-            }
-            pgd_mfn = _mfn(l3e_get_pfn(*l3e));
-            unmap_domain_page(l3e);
-            hd->pgd = maddr_to_virt(pagetable_get_paddr(
-                pagetable_from_mfn(pgd_mfn)));
-            break;
-
-        case VTD_PAGE_TABLE_LEVEL_4:
-            pgd_mfn = _mfn(p2m_table);
-            hd->pgd = maddr_to_virt(pagetable_get_paddr(
-                pagetable_from_mfn(pgd_mfn)));
-            break;
-        default:
-            gdprintk(XENLOG_ERR VTDPREFIX,
-                     "iommu_set_pgd:Unsupported p2m table sharing level!\n");
-            break;
-        }
-#endif
-    }
-    gdprintk(XENLOG_INFO VTDPREFIX,
-             "iommu_set_pgd: hd->pgd = %p\n", hd->pgd);
-}
-
 
 u8 iommu_state[MAX_IOMMU_REGS * MAX_IOMMUS];
 int iommu_suspend(void)
diff -r 85848be18ba2 -r 1d3aaa6a8b87 xen/drivers/passthrough/vtd/iommu.h
--- a/xen/drivers/passthrough/vtd/iommu.h       Thu Apr 10 09:20:07 2008 +0100
+++ b/xen/drivers/passthrough/vtd/iommu.h       Thu Apr 10 09:22:38 2008 +0100
@@ -425,7 +425,7 @@ extern struct list_head acpi_ioapic_unit
 extern struct list_head acpi_ioapic_units;
 
 struct qi_ctrl {
-    struct qinval_entry *qinval;         /* queue invalidation page */
+    u64 qinval_maddr;  /* queue invalidation page machine address */
     int qinval_index;                    /* queue invalidation index */
     spinlock_t qinval_lock;      /* lock for queue invalidation page */
     spinlock_t qinval_poll_lock; /* lock for queue invalidation poll addr */
@@ -433,7 +433,7 @@ struct qi_ctrl {
 };
 
 struct ir_ctrl {
-    struct iremap_entry *iremap; /* interrupt remap table */
+    u64 iremap_maddr;            /* interrupt remap table machine address */
     int iremap_index;            /* interrupt remap index */
     spinlock_t iremap_lock;      /* lock for irq remappping table */
 };
diff -r 85848be18ba2 -r 1d3aaa6a8b87 xen/drivers/passthrough/vtd/qinval.c
--- a/xen/drivers/passthrough/vtd/qinval.c      Thu Apr 10 09:20:07 2008 +0100
+++ b/xen/drivers/passthrough/vtd/qinval.c      Thu Apr 10 09:22:38 2008 +0100
@@ -63,13 +63,14 @@ static int gen_cc_inv_dsc(struct iommu *
 static int gen_cc_inv_dsc(struct iommu *iommu, int index,
     u16 did, u16 source_id, u8 function_mask, u8 granu)
 {
-    u64 *ptr64;
-    unsigned long flags;
-    struct qinval_entry * qinval_entry = NULL;
-    struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
-
-    spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
-    qinval_entry = &qi_ctrl->qinval[index];
+    unsigned long flags;
+    struct qinval_entry *qinval_entry = NULL, *qinval_entries;
+    struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
+
+    spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
+    qinval_entries =
+        (struct qinval_entry *)map_vtd_domain_page(qi_ctrl->qinval_maddr);
+    qinval_entry = &qinval_entries[index];
     qinval_entry->q.cc_inv_dsc.lo.type = TYPE_INVAL_CONTEXT;
     qinval_entry->q.cc_inv_dsc.lo.granu = granu;
     qinval_entry->q.cc_inv_dsc.lo.res_1 = 0;
@@ -78,9 +79,10 @@ static int gen_cc_inv_dsc(struct iommu *
     qinval_entry->q.cc_inv_dsc.lo.fm = function_mask;
     qinval_entry->q.cc_inv_dsc.lo.res_2 = 0;
     qinval_entry->q.cc_inv_dsc.hi.res = 0;
-    spin_unlock_irqrestore(&qi_ctrl->qinval_lock, flags);
-
-    ptr64 = (u64 *)qinval_entry;
+
+    unmap_vtd_domain_page(qinval_entries);
+    spin_unlock_irqrestore(&qi_ctrl->qinval_lock, flags);
+
     return 0;
 }
 
@@ -93,7 +95,7 @@ int queue_invalidate_context(struct iomm
 
     spin_lock_irqsave(&iommu->register_lock, flags);
     index = qinval_next_index(iommu);
-    if (index == -1)
+    if ( index == -1 )
         return -EBUSY;
     ret = gen_cc_inv_dsc(iommu, index, did, source_id,
                          function_mask, granu);
@@ -106,14 +108,16 @@ static int gen_iotlb_inv_dsc(struct iomm
     u8 granu, u8 dr, u8 dw, u16 did, u8 am, u8 ih, u64 addr)
 {
     unsigned long flags;
-    struct qinval_entry * qinval_entry = NULL;
+    struct qinval_entry *qinval_entry = NULL, *qinval_entries;
     struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
 
     if ( index == -1 )
         return -1;
     spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
 
-    qinval_entry = &qi_ctrl->qinval[index];
+    qinval_entries =
+        (struct qinval_entry *)map_vtd_domain_page(qi_ctrl->qinval_maddr);
+    qinval_entry = &qinval_entries[index];
     qinval_entry->q.iotlb_inv_dsc.lo.type = TYPE_INVAL_IOTLB;
     qinval_entry->q.iotlb_inv_dsc.lo.granu = granu;
     qinval_entry->q.iotlb_inv_dsc.lo.dr = 0;
@@ -127,6 +131,7 @@ static int gen_iotlb_inv_dsc(struct iomm
     qinval_entry->q.iotlb_inv_dsc.hi.res_1 = 0;
     qinval_entry->q.iotlb_inv_dsc.hi.addr = addr;
 
+    unmap_vtd_domain_page(qinval_entries);
     spin_unlock_irqrestore(&qi_ctrl->qinval_lock, flags);
     return 0;
 }
@@ -151,15 +156,16 @@ static int gen_wait_dsc(struct iommu *io
 static int gen_wait_dsc(struct iommu *iommu, int index,
     u8 iflag, u8 sw, u8 fn, u32 sdata, volatile u32 *saddr)
 {
-    u64 *ptr64;
-    unsigned long flags;
-    struct qinval_entry * qinval_entry = NULL;
+    unsigned long flags;
+    struct qinval_entry *qinval_entry = NULL, *qinval_entries;
     struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
 
     if ( index == -1 )
         return -1;
     spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
-    qinval_entry = &qi_ctrl->qinval[index];
+    qinval_entries =
+        (struct qinval_entry *)map_vtd_domain_page(qi_ctrl->qinval_maddr);
+    qinval_entry = &qinval_entries[index];
     qinval_entry->q.inv_wait_dsc.lo.type = TYPE_INVAL_WAIT;
     qinval_entry->q.inv_wait_dsc.lo.iflag = iflag;
     qinval_entry->q.inv_wait_dsc.lo.sw = sw;
@@ -168,8 +174,8 @@ static int gen_wait_dsc(struct iommu *io
     qinval_entry->q.inv_wait_dsc.lo.sdata = sdata;
     qinval_entry->q.inv_wait_dsc.hi.res_1 = 0;
     qinval_entry->q.inv_wait_dsc.hi.saddr = virt_to_maddr(saddr) >> 2;
-    spin_unlock_irqrestore(&qi_ctrl->qinval_lock, flags);
-    ptr64 = (u64 *)qinval_entry;
+    unmap_vtd_domain_page(qinval_entries);
+    spin_unlock_irqrestore(&qi_ctrl->qinval_lock, flags);
     return 0;
 }
 
@@ -185,7 +191,7 @@ static int queue_invalidate_wait(struct 
     spin_lock_irqsave(&qi_ctrl->qinval_poll_lock, flags);
     spin_lock_irqsave(&iommu->register_lock, flags);
     index = qinval_next_index(iommu);
-    if (*saddr == 1)
+    if ( *saddr == 1 )
         *saddr = 0;
     ret = gen_wait_dsc(iommu, index, iflag, sw, fn, sdata, saddr);
     ret |= qinval_update_qtail(iommu, index);
@@ -196,8 +202,10 @@ static int queue_invalidate_wait(struct 
     {
         /* In case all wait descriptor writes to same addr with same data */
         start_time = jiffies;
-        while ( *saddr != 1 ) {
-            if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT)) {
+        while ( *saddr != 1 )
+        {
+            if ( time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT) )
+            {
                 print_qi_regs(iommu);
                 panic("queue invalidate wait descriptor was not executed\n");
             }
@@ -213,7 +221,7 @@ int invalidate_sync(struct iommu *iommu)
     int ret = -1;
     struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
 
-    if (qi_ctrl->qinval)
+    if ( qi_ctrl->qinval_maddr == 0 )
     {
         ret = queue_invalidate_wait(iommu,
             0, 1, 1, 1, &qi_ctrl->qinval_poll_status);
@@ -226,14 +234,16 @@ static int gen_dev_iotlb_inv_dsc(struct 
     u32 max_invs_pend, u16 sid, u16 size, u64 addr)
 {
     unsigned long flags;
-    struct qinval_entry * qinval_entry = NULL;
+    struct qinval_entry *qinval_entry = NULL, *qinval_entries;
     struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
 
     if ( index == -1 )
         return -1;
     spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
 
-    qinval_entry = &qi_ctrl->qinval[index];
+    qinval_entries =
+        (struct qinval_entry *)map_vtd_domain_page(qi_ctrl->qinval_maddr);
+    qinval_entry = &qinval_entries[index];
     qinval_entry->q.dev_iotlb_inv_dsc.lo.type = TYPE_INVAL_DEVICE_IOTLB;
     qinval_entry->q.dev_iotlb_inv_dsc.lo.res_1 = 0;
     qinval_entry->q.dev_iotlb_inv_dsc.lo.max_invs_pend = max_invs_pend;
@@ -244,6 +254,7 @@ static int gen_dev_iotlb_inv_dsc(struct 
     qinval_entry->q.dev_iotlb_inv_dsc.hi.size = size;
     qinval_entry->q.dev_iotlb_inv_dsc.hi.addr = addr;
 
+    unmap_vtd_domain_page(qinval_entries);
     spin_unlock_irqrestore(&qi_ctrl->qinval_lock, flags);
     return 0;
 }
@@ -268,14 +279,16 @@ static int gen_iec_inv_dsc(struct iommu 
     u8 granu, u8 im, u16 iidx)
 {
     unsigned long flags;
-    struct qinval_entry * qinval_entry = NULL;
+    struct qinval_entry *qinval_entry = NULL, *qinval_entries;
     struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
 
     if ( index == -1 )
         return -1;
     spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
 
-    qinval_entry = &qi_ctrl->qinval[index];
+    qinval_entries =
+        (struct qinval_entry *)map_vtd_domain_page(qi_ctrl->qinval_maddr);
+    qinval_entry = &qinval_entries[index];
     qinval_entry->q.iec_inv_dsc.lo.type = TYPE_INVAL_IEC;
     qinval_entry->q.iec_inv_dsc.lo.granu = granu;
     qinval_entry->q.iec_inv_dsc.lo.res_1 = 0;
@@ -284,6 +297,7 @@ static int gen_iec_inv_dsc(struct iommu 
     qinval_entry->q.iec_inv_dsc.lo.res_2 = 0;
     qinval_entry->q.iec_inv_dsc.hi.res = 0;
 
+    unmap_vtd_domain_page(qinval_entries);
     spin_unlock_irqrestore(&qi_ctrl->qinval_lock, flags);
     return 0;
 }
@@ -349,7 +363,7 @@ static int flush_context_qi(
             did = 0;
     }
 
-    if (qi_ctrl->qinval)
+    if ( qi_ctrl->qinval_maddr != 0 )
     {
         ret = queue_invalidate_context(iommu, did, sid, fm,
                                        type >> DMA_CCMD_INVL_GRANU_OFFSET);
@@ -382,7 +396,8 @@ static int flush_iotlb_qi(
             did = 0;
     }
 
-    if (qi_ctrl->qinval) {
+    if ( qi_ctrl->qinval_maddr != 0 )
+    {
         /* use queued invalidation */
         if (cap_write_drain(iommu->cap))
             dw = 1;
@@ -400,7 +415,6 @@ int qinval_setup(struct iommu *iommu)
 int qinval_setup(struct iommu *iommu)
 {
     unsigned long start_time;
-    u64 paddr;
     u32 status = 0;
     struct qi_ctrl *qi_ctrl;
     struct iommu_flush *flush;
@@ -411,15 +425,14 @@ int qinval_setup(struct iommu *iommu)
     if ( !ecap_queued_inval(iommu->ecap) )
         return -ENODEV;
 
-    if (qi_ctrl->qinval == NULL) {
-        qi_ctrl->qinval = alloc_xenheap_page();
-        if (qi_ctrl->qinval == NULL)
-            panic("Cannot allocate memory for qi_ctrl->qinval\n");
-        memset((u8*)qi_ctrl->qinval, 0, PAGE_SIZE_4K);
+    if ( qi_ctrl->qinval_maddr == 0 )
+    {
+        qi_ctrl->qinval_maddr = alloc_pgtable_maddr();
+        if ( qi_ctrl->qinval_maddr == 0 )
+            panic("Cannot allocate memory for qi_ctrl->qinval_maddr\n");
         flush->context = flush_context_qi;
         flush->iotlb = flush_iotlb_qi;
     }
-    paddr = virt_to_maddr(qi_ctrl->qinval);
 
     /* Setup Invalidation Queue Address(IQA) register with the
      * address of the page we just allocated.  QS field at
@@ -428,7 +441,7 @@ int qinval_setup(struct iommu *iommu)
      * registers are automatically reset to 0 with write
      * to IQA register.
      */
-    dmar_writeq(iommu->reg, DMAR_IQA_REG, paddr);
+    dmar_writeq(iommu->reg, DMAR_IQA_REG, qi_ctrl->qinval_maddr);
 
     /* enable queued invalidation hardware */
     iommu->gcmd |= DMA_GCMD_QIE;
@@ -436,11 +449,12 @@ int qinval_setup(struct iommu *iommu)
 
     /* Make sure hardware complete it */
     start_time = jiffies;
-    while (1) {
+    while ( 1 )
+    {
         status = dmar_readl(iommu->reg, DMAR_GSTS_REG);
-        if (status & DMA_GSTS_QIES)
+        if ( status & DMA_GSTS_QIES )
             break;
-        if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT))
+        if ( time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT) )
             panic("Cannot set QIE field for queue invalidation\n");
         cpu_relax();
     }
diff -r 85848be18ba2 -r 1d3aaa6a8b87 xen/drivers/passthrough/vtd/utils.c
--- a/xen/drivers/passthrough/vtd/utils.c       Thu Apr 10 09:20:07 2008 +0100
+++ b/xen/drivers/passthrough/vtd/utils.c       Thu Apr 10 09:22:38 2008 +0100
@@ -25,6 +25,7 @@
 #include "../pci-direct.h"
 #include "../pci_regs.h"
 #include "msi.h"
+#include "vtd.h"
 
 #define INTEL   0x8086
 #define SEABURG 0x4000
@@ -243,7 +244,7 @@ u32 get_level_index(unsigned long gmfn, 
 }
 
 void print_vtd_entries(
-    struct domain *d, 
+    struct domain *d,
     struct iommu *iommu,
     int bus, int devfn,
     unsigned long gmfn)
@@ -261,37 +262,40 @@ void print_vtd_entries(
     printk("print_vtd_entries: domain_id = %x bdf = %x:%x:%x gmfn = %lx\n",
            d->domain_id, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), gmfn);
 
-    if ( hd->pgd == NULL )
-    {
-        printk("    hg->pgd == NULL\n");
+    if ( hd->pgd_maddr == 0 )
+    {
+        printk("    hd->pgd_maddr == 0\n");
         return;
     }
-    printk("    d->pgd = %p virt_to_maddr(hd->pgd) = %lx\n",
-           hd->pgd, virt_to_maddr(hd->pgd));
+    printk("    hd->pgd_maddr = %"PRIx64"\n", hd->pgd_maddr);
 
     for_each_drhd_unit ( drhd )
     {
         printk("---- print_vtd_entries %d ----\n", i++);
 
-        root_entry = iommu->root_entry;
-        if ( root_entry == NULL )
-        {
-            printk("    root_entry == NULL\n");
-            continue;
-        }
-
+        if ( iommu->root_maddr == 0 )
+        {
+            printk("    iommu->root_maddr = 0\n");
+            continue;
+        }
+
+        root_entry =
+            (struct root_entry *)map_vtd_domain_page(iommu->root_maddr);
+ 
         printk("    root_entry = %p\n", root_entry);
         printk("    root_entry[%x] = %"PRIx64"\n", bus, root_entry[bus].val);
         if ( !root_present(root_entry[bus]) )
         {
+            unmap_vtd_domain_page(root_entry);
             printk("    root_entry[%x] not present\n", bus);
             continue;
         }
 
         ctxt_entry =
-            maddr_to_virt((root_entry[bus].val >> PAGE_SHIFT) << PAGE_SHIFT);
+            (struct context_entry *)map_vtd_domain_page(root_entry[bus].val);
         if ( ctxt_entry == NULL )
         {
+            unmap_vtd_domain_page(root_entry);
             printk("    ctxt_entry == NULL\n");
             continue;
         }
@@ -301,6 +305,8 @@ void print_vtd_entries(
                devfn, ctxt_entry[devfn].hi, ctxt_entry[devfn].lo);
         if ( !context_present(ctxt_entry[devfn]) )
         {
+            unmap_vtd_domain_page(ctxt_entry);
+            unmap_vtd_domain_page(root_entry);
             printk("    ctxt_entry[%x] not present\n", devfn);
             continue;
         }
@@ -308,6 +314,8 @@ void print_vtd_entries(
         if ( level != VTD_PAGE_TABLE_LEVEL_3 &&
              level != VTD_PAGE_TABLE_LEVEL_4)
         {
+            unmap_vtd_domain_page(ctxt_entry);
+            unmap_vtd_domain_page(root_entry);
             printk("Unsupported VTD page table level (%d)!\n", level);
             continue;
         }
@@ -319,6 +327,8 @@ void print_vtd_entries(
             printk("    l%d = %p\n", level, l);
             if ( l == NULL )
             {
+                unmap_vtd_domain_page(ctxt_entry);
+                unmap_vtd_domain_page(root_entry);
                 printk("    l%d == NULL\n", level);
                 break;
             }
@@ -329,6 +339,8 @@ void print_vtd_entries(
             pte.val = l[l_index];
             if ( !dma_pte_present(pte) )
             {
+                unmap_vtd_domain_page(ctxt_entry);
+                unmap_vtd_domain_page(root_entry);
                 printk("    l%d[%x] not present\n", level, l_index);
                 break;
             }
diff -r 85848be18ba2 -r 1d3aaa6a8b87 xen/drivers/passthrough/vtd/x86/vtd.c
--- a/xen/drivers/passthrough/vtd/x86/vtd.c     Thu Apr 10 09:20:07 2008 +0100
+++ b/xen/drivers/passthrough/vtd/x86/vtd.c     Thu Apr 10 09:22:38 2008 +0100
@@ -20,6 +20,7 @@
 
 #include <xen/sched.h>
 #include <xen/domain_page.h>
+#include <asm/paging.h>
 #include <xen/iommu.h>
 #include "../iommu.h"
 #include "../dmar.h"
@@ -124,3 +125,179 @@ void hvm_dpci_isairq_eoi(struct domain *
         }
     }
 }
+
+void iommu_set_pgd(struct domain *d)
+{
+    struct hvm_iommu *hd  = domain_hvm_iommu(d);
+    unsigned long p2m_table;
+    int level = agaw_to_level(hd->agaw);
+    l3_pgentry_t *l3e;
+
+    p2m_table = mfn_x(pagetable_get_mfn(d->arch.phys_table));
+
+    if ( paging_mode_hap(d) )
+    {
+        int level = agaw_to_level(hd->agaw);
+        struct dma_pte *dpte = NULL;
+        mfn_t pgd_mfn;
+
+        switch ( level )
+        {
+        case VTD_PAGE_TABLE_LEVEL_3:
+            dpte = map_domain_page(p2m_table);
+            if ( !dma_pte_present(*dpte) )
+            {
+                gdprintk(XENLOG_ERR VTDPREFIX,
+                         "iommu_set_pgd: second level wasn't there\n");
+                unmap_domain_page(dpte);
+                return;
+            }
+            pgd_mfn = _mfn(dma_pte_addr(*dpte) >> PAGE_SHIFT_4K);
+            hd->pgd_maddr = mfn_x(pgd_mfn) << PAGE_SHIFT_4K;
+            unmap_domain_page(dpte);
+            break;
+        case VTD_PAGE_TABLE_LEVEL_4:
+            pgd_mfn = _mfn(p2m_table);
+            hd->pgd_maddr = mfn_x(pgd_mfn) << PAGE_SHIFT_4K;
+            break;
+        default:
+            gdprintk(XENLOG_ERR VTDPREFIX,
+                     "iommu_set_pgd:Unsupported p2m table sharing level!\n");
+            break;
+        }
+    }
+    else
+    {
+#if CONFIG_PAGING_LEVELS == 3
+        struct dma_pte *pte = NULL, *pgd_vaddr = NULL, *pmd_vaddr = NULL;
+        int i;
+        u64 pmd_maddr;
+        unsigned long flags;
+
+        spin_lock_irqsave(&hd->mapping_lock, flags);
+        hd->pgd_maddr = alloc_pgtable_maddr();
+        if ( hd->pgd_maddr == 0 )
+        {
+            spin_unlock_irqrestore(&hd->mapping_lock, flags);
+            gdprintk(XENLOG_ERR VTDPREFIX,
+                     "Allocate pgd memory failed!\n");
+            return;
+        }
+
+        pgd_vaddr = map_vtd_domain_page(hd->pgd_maddr);
+        l3e = map_domain_page(p2m_table);
+        switch ( level )
+        {
+        case VTD_PAGE_TABLE_LEVEL_3:        /* Weybridge */
+            /* We only support 8 entries for the PAE L3 p2m table */
+            for ( i = 0; i < 8 ; i++ )
+            {
+                /* Don't create new L2 entry, use ones from p2m table */
+                pgd_vaddr[i].val = l3e[i].l3 | _PAGE_PRESENT | _PAGE_RW;
+            }
+            break;
+
+        case VTD_PAGE_TABLE_LEVEL_4:        /* Stoakley */
+            /* We allocate one more page for the top vtd page table. */
+            pmd_maddr = alloc_pgtable_maddr();
+            if ( pmd_maddr == 0 )
+            {
+                unmap_vtd_domain_page(pgd_vaddr);
+                unmap_domain_page(l3e);
+                spin_unlock_irqrestore(&hd->mapping_lock, flags);
+                gdprintk(XENLOG_ERR VTDPREFIX,
+                         "Allocate pmd memory failed!\n");
+                return;
+            }
+
+            pte = &pgd_vaddr[0];
+            dma_set_pte_addr(*pte, pmd_maddr);
+            dma_set_pte_readable(*pte);
+            dma_set_pte_writable(*pte);
+
+            pmd_vaddr = map_vtd_domain_page(pmd_maddr);
+            for ( i = 0; i < 8; i++ )
+            {
+                /* Don't create new L2 entry, use ones from p2m table */
+                pmd_vaddr[i].val = l3e[i].l3 | _PAGE_PRESENT | _PAGE_RW;
+            }
+
+            unmap_vtd_domain_page(pmd_vaddr);
+            break;
+        default:
+            gdprintk(XENLOG_ERR VTDPREFIX,
+                     "iommu_set_pgd:Unsupported p2m table sharing level!\n");
+            break;
+        }
+
+        unmap_vtd_domain_page(pgd_vaddr);
+        unmap_domain_page(l3e);
+        spin_unlock_irqrestore(&hd->mapping_lock, flags);
+
+#elif CONFIG_PAGING_LEVELS == 4
+        mfn_t pgd_mfn;
+
+        switch ( level )
+        {
+        case VTD_PAGE_TABLE_LEVEL_3:
+            l3e = map_domain_page(p2m_table);
+            if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 )
+            {
+                gdprintk(XENLOG_ERR VTDPREFIX,
+                         "iommu_set_pgd: second level wasn't there\n");
+                unmap_domain_page(l3e);
+                return;
+            }
+
+            pgd_mfn = _mfn(l3e_get_pfn(*l3e));
+            hd->pgd_maddr = mfn_x(pgd_mfn) << PAGE_SHIFT_4K;
+            unmap_domain_page(l3e);
+            break;
+        case VTD_PAGE_TABLE_LEVEL_4:
+            pgd_mfn = _mfn(p2m_table);
+            hd->pgd_maddr = mfn_x(pgd_mfn) << PAGE_SHIFT_4K;
+            break;
+        default:
+            gdprintk(XENLOG_ERR VTDPREFIX,
+                     "iommu_set_pgd:Unsupported p2m table sharing level!\n");
+            break;
+        }
+#endif
+    }
+}
+
+void iommu_free_pgd(struct domain *d)
+{
+#if CONFIG_PAGING_LEVELS == 3
+    struct hvm_iommu *hd  = domain_hvm_iommu(d);
+    int level = agaw_to_level(hd->agaw);
+    struct dma_pte *pgd_vaddr = NULL;
+
+    switch ( level )
+    {
+    case VTD_PAGE_TABLE_LEVEL_3:
+        if ( hd->pgd_maddr != 0 )
+        {
+            free_pgtable_maddr(hd->pgd_maddr);
+            hd->pgd_maddr = 0;
+        }
+        break;
+    case VTD_PAGE_TABLE_LEVEL_4:
+        if ( hd->pgd_maddr != 0 )
+        {
+            pgd_vaddr = (struct dma_pte*)map_vtd_domain_page(hd->pgd_maddr);
+            if ( pgd_vaddr[0].val != 0 )
+                free_pgtable_maddr(pgd_vaddr[0].val);
+            unmap_vtd_domain_page(pgd_vaddr);
+            free_pgtable_maddr(hd->pgd_maddr);
+            hd->pgd_maddr = 0;
+        }
+        break;
+    default:
+        gdprintk(XENLOG_ERR VTDPREFIX,
+                 "Unsupported p2m table sharing level!\n");
+        break;
+    }
+#endif
+}
+
diff -r 85848be18ba2 -r 1d3aaa6a8b87 xen/include/xen/hvm/iommu.h
--- a/xen/include/xen/hvm/iommu.h       Thu Apr 10 09:20:07 2008 +0100
+++ b/xen/include/xen/hvm/iommu.h       Thu Apr 10 09:22:38 2008 +0100
@@ -38,7 +38,7 @@ struct hvm_iommu {
 struct hvm_iommu {
     spinlock_t iommu_list_lock;    /* protect iommu specific lists */
     struct list_head pdev_list;    /* direct accessed pci devices */
-    struct dma_pte *pgd;           /* io page directory root */
+    u64 pgd_maddr;                 /* io page directory machine address */
     spinlock_t mapping_lock;       /* io page table lock */
     int agaw;     /* adjusted guest address width, 0 is level 2 30-bit */
     struct list_head g2m_ioport_list;  /* guest to machine ioport mapping */
diff -r 85848be18ba2 -r 1d3aaa6a8b87 xen/include/xen/iommu.h
--- a/xen/include/xen/iommu.h   Thu Apr 10 09:20:07 2008 +0100
+++ b/xen/include/xen/iommu.h   Thu Apr 10 09:22:38 2008 +0100
@@ -67,7 +67,7 @@ struct iommu {
     u64        ecap;
     spinlock_t lock; /* protect context, domain ids */
     spinlock_t register_lock; /* protect iommu register handling */
-    struct root_entry *root_entry; /* virtual address */
+    u64 root_maddr; /* root entry machine address */
     unsigned int vector;
     struct intel_iommu *intel;
 };
@@ -85,6 +85,7 @@ int iommu_unmap_page(struct domain *d, u
 int iommu_unmap_page(struct domain *d, unsigned long gfn);
 void iommu_flush(struct domain *d, unsigned long gfn, u64 *p2m_entry);
 void iommu_set_pgd(struct domain *d);
+void iommu_free_pgd(struct domain *d);
 void iommu_domain_teardown(struct domain *d);
 int hvm_do_IRQ_dpci(struct domain *d, unsigned int irq);
 int dpci_ioport_intercept(ioreq_t *p);

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.