[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] amd iommu: dynamic page table depth adjustment.



# HG changeset patch
# User Wei Wang <wei.wang2@xxxxxxx>
# Date 1297241832 0
# Node ID 3c5990fabb79cbc73fcc4d589ef5b54618aaf739
# Parent  9f96906ec72452390180c30ea96f3d3006943040
amd iommu: dynamic page table depth adjustment.

IO Page table growth is triggered by amd_iommu_map_page and grows to
upper level. I have tested it well for different devices (nic and gfx)
and different guests (linux and Win7) with different guest memory
sizes (512M, 1G, 4G and above).

Signed-off-by: Wei Wang <wei.wang2@xxxxxxx>
---
 xen/drivers/passthrough/amd/iommu_map.c      |  107 +++++++++++++++++++++++++++
 xen/drivers/passthrough/amd/pci_amd_iommu.c  |    8 +-
 xen/include/asm-x86/hvm/svm/amd-iommu-defs.h |    2 
 3 files changed, 111 insertions(+), 6 deletions(-)

diff -r 9f96906ec724 -r 3c5990fabb79 xen/drivers/passthrough/amd/iommu_map.c
--- a/xen/drivers/passthrough/amd/iommu_map.c   Wed Feb 09 08:54:37 2011 +0000
+++ b/xen/drivers/passthrough/amd/iommu_map.c   Wed Feb 09 08:57:12 2011 +0000
@@ -472,6 +472,89 @@ static u64 iommu_l2e_from_pfn(struct pag
     return next_table_maddr;
 }
 
+static int update_paging_mode(struct domain *d, unsigned long gfn)
+{
+    u16 bdf;
+    void *device_entry;
+    unsigned int req_id, level, offset;
+    unsigned long flags;
+    struct pci_dev *pdev;
+    struct amd_iommu *iommu = NULL;
+    struct page_info *new_root = NULL;
+    struct page_info *old_root = NULL;
+    void *new_root_vaddr;
+    u64 old_root_maddr;
+    struct hvm_iommu *hd = domain_hvm_iommu(d);
+
+    level = hd->paging_mode;
+    old_root = hd->root_table;
+    offset = gfn >> (PTE_PER_TABLE_SHIFT * (level - 1));
+
+    ASSERT(spin_is_locked(&hd->mapping_lock) && is_hvm_domain(d));
+
+    while ( offset >= PTE_PER_TABLE_SIZE )
+    {
+        /* Allocate and install a new root table.
+         * Only upper I/O page table grows, no need to fix next level bits */
+        new_root = alloc_amd_iommu_pgtable();
+        if ( new_root == NULL )
+        {
+            AMD_IOMMU_DEBUG("%s Cannot allocate I/O page table\n",
+                            __func__);
+            return -ENOMEM;
+        }
+
+        new_root_vaddr = __map_domain_page(new_root);
+        old_root_maddr = page_to_maddr(old_root);
+        amd_iommu_set_page_directory_entry((u32 *)new_root_vaddr,
+                                           old_root_maddr, level);
+        level++;
+        old_root = new_root;
+        offset >>= PTE_PER_TABLE_SHIFT;
+    }
+
+    if ( new_root != NULL )
+    {
+        hd->paging_mode = level;
+        hd->root_table = new_root;
+
+        if ( !spin_is_locked(&pcidevs_lock) )
+            AMD_IOMMU_DEBUG("%s Try to access pdev_list "
+                            "without aquiring pcidevs_lock.\n", __func__);
+
+        /* Update device table entries using new root table and paging mode */
+        for_each_pdev( d, pdev )
+        {
+            bdf = (pdev->bus << 8) | pdev->devfn;
+            req_id = get_dma_requestor_id(bdf);
+            iommu = find_iommu_for_device(bdf);
+            if ( !iommu )
+            {
+                AMD_IOMMU_DEBUG("%s Fail to find iommu.\n", __func__);
+                return -ENODEV;
+            }
+
+            spin_lock_irqsave(&iommu->lock, flags);
+            device_entry = iommu->dev_table.buffer +
+                           (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE);
+
+            /* valid = 0 only works for dom0 passthrough mode */
+            amd_iommu_set_root_page_table((u32 *)device_entry,
+                                          page_to_maddr(hd->root_table),
+                                          hd->domain_id,
+                                          hd->paging_mode, 1);
+
+            invalidate_dev_table_entry(iommu, req_id);
+            flush_command_buffer(iommu);
+            spin_unlock_irqrestore(&iommu->lock, flags);
+        }
+
+        /* For safety, invalidate all entries */
+        invalidate_all_iommu_pages(d);
+    }
+    return 0;
+}
+
 int amd_iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn,
                        unsigned int flags)
 {
@@ -481,6 +564,18 @@ int amd_iommu_map_page(struct domain *d,
     BUG_ON( !hd->root_table );
 
     spin_lock(&hd->mapping_lock);
+
+    /* Since HVM domain is initialized with 2 level IO page table,
+     * we might need a deeper page table for lager gfn now */
+    if ( is_hvm_domain(d) )
+    {
+        if ( update_paging_mode(d, gfn) )
+        {
+            AMD_IOMMU_DEBUG("Update page mode failed gfn = %lx\n", gfn);
+            domain_crash(d);
+            return -EFAULT;
+        }
+    }
 
     iommu_l2e = iommu_l2e_from_pfn(hd->root_table, hd->paging_mode, gfn);
     if ( iommu_l2e == 0 )
@@ -509,6 +604,18 @@ int amd_iommu_unmap_page(struct domain *
     BUG_ON( !hd->root_table );
 
     spin_lock(&hd->mapping_lock);
+
+    /* Since HVM domain is initialized with 2 level IO page table,
+     * we might need a deeper page table for lager gfn now */
+    if ( is_hvm_domain(d) )
+    {
+        if ( update_paging_mode(d, gfn) )
+        {
+            AMD_IOMMU_DEBUG("Update page mode failed gfn = %lx\n", gfn);
+            domain_crash(d);
+            return -EFAULT;
+        }
+    }
 
     iommu_l2e = iommu_l2e_from_pfn(hd->root_table, hd->paging_mode, gfn);
 
diff -r 9f96906ec724 -r 3c5990fabb79 xen/drivers/passthrough/amd/pci_amd_iommu.c
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c       Wed Feb 09 08:54:37 
2011 +0000
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c       Wed Feb 09 08:57:12 
2011 +0000
@@ -214,8 +214,11 @@ static int amd_iommu_domain_init(struct 
         return -ENOMEM;
     }
 
+    /* For pv and dom0, stick with get_paging_mode(max_page)
+     * For HVM dom0, use 2 level page table at first */
     hd->paging_mode = is_hvm_domain(d) ?
-        IOMMU_PAGE_TABLE_LEVEL_4 : get_paging_mode(max_page);
+                      IOMMU_PAGING_MODE_LEVEL_2 :
+                      get_paging_mode(max_page);
 
     hd->domain_id = d->domain_id;
 
@@ -297,9 +300,6 @@ static int reassign_device( struct domai
 
     list_move(&pdev->domain_list, &target->arch.pdev_list);
     pdev->domain = target;
-
-    if ( target->max_pages > 0 )
-        t->paging_mode = get_paging_mode(target->max_pages);
 
     /* IO page tables might be destroyed after pci-detach the last device
      * In this case, we have to re-allocate root table for next pci-attach.*/
diff -r 9f96906ec724 -r 3c5990fabb79 
xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h      Wed Feb 09 08:54:37 
2011 +0000
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h      Wed Feb 09 08:57:12 
2011 +0000
@@ -386,8 +386,6 @@
 #define IOMMU_PAGES                 (MMIO_PAGES_PER_IOMMU * MAX_AMD_IOMMUS)
 #define DEFAULT_DOMAIN_ADDRESS_WIDTH    48
 #define MAX_AMD_IOMMUS                  32
-#define IOMMU_PAGE_TABLE_LEVEL_3        3
-#define IOMMU_PAGE_TABLE_LEVEL_4        4
 
 /* interrupt remapping table */
 #define INT_REMAP_INDEX_DM_MASK         0x1C00

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.