[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[xen staging] AMD/IOMMU: allow use of superpage mappings



commit 4b7c48b4ba0e9b0c6ab3d680112d011e1767ee71
Author:     Jan Beulich <jbeulich@xxxxxxxx>
AuthorDate: Mon Jul 25 15:35:40 2022 +0200
Commit:     Jan Beulich <jbeulich@xxxxxxxx>
CommitDate: Mon Jul 25 15:35:40 2022 +0200

    AMD/IOMMU: allow use of superpage mappings
    
    No separate feature flags exist which would control availability of
    these; the only restriction is HATS (establishing the maximum number of
    page table levels in general), and even that has a lower bound of 4.
    Thus we can unconditionally announce 2M and 1G mappings. (Via non-
    default page sizes the implementation in principle permits arbitrary
    size mappings, but these require multiple identical leaf PTEs to be
    written, which isn't all that different from having to write multiple
    consecutive PTEs with increasing frame numbers. IMO that's therefore
    beneficial only on hardware where suitable TLBs exist; I'm unaware of
    such hardware.)
    
    Note that in principle 512G and 256T mappings could also be supported
    right away, but the freeing of page tables (to be introduced in
    subsequent patches) when replacing a sufficiently populated tree with a
    single huge page would need suitable preemption, which will require
    extra work.
    
    Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
    Reviewed-by: Roger Pau Monné <roger.pau@xxxxxxxxxx>
    Reviewed-by: Paul Durrant <paul@xxxxxxx>
---
 xen/drivers/passthrough/amd/iommu_map.c     | 56 +++++++++++++++++++++++++----
 xen/drivers/passthrough/amd/pci_amd_iommu.c |  2 +-
 xen/include/xen/page-defs.h                 | 10 ++++++
 3 files changed, 60 insertions(+), 8 deletions(-)

diff --git a/xen/drivers/passthrough/amd/iommu_map.c 
b/xen/drivers/passthrough/amd/iommu_map.c
index 28da5cc85e..fa8fd6f800 100644
--- a/xen/drivers/passthrough/amd/iommu_map.c
+++ b/xen/drivers/passthrough/amd/iommu_map.c
@@ -32,12 +32,13 @@ static unsigned int pfn_to_pde_idx(unsigned long pfn, 
unsigned int level)
 }
 
 static union amd_iommu_pte clear_iommu_pte_present(unsigned long l1_mfn,
-                                                   unsigned long dfn)
+                                                   unsigned long dfn,
+                                                   unsigned int level)
 {
     union amd_iommu_pte *table, *pte, old;
 
     table = map_domain_page(_mfn(l1_mfn));
-    pte = &table[pfn_to_pde_idx(dfn, 1)];
+    pte = &table[pfn_to_pde_idx(dfn, level)];
     old = *pte;
 
     write_atomic(&pte->raw, 0);
@@ -351,15 +352,39 @@ static int iommu_pde_from_dfn(struct domain *d, unsigned 
long dfn,
     return 0;
 }
 
+static void queue_free_pt(struct domain_iommu *hd, mfn_t mfn, unsigned int 
level)
+{
+    if ( level > 1 )
+    {
+        union amd_iommu_pte *pt = map_domain_page(mfn);
+        unsigned int i;
+
+        for ( i = 0; i < PTE_PER_TABLE_SIZE; ++i )
+            if ( pt[i].pr && pt[i].next_level )
+            {
+                ASSERT(pt[i].next_level < level);
+                queue_free_pt(hd, _mfn(pt[i].mfn), pt[i].next_level);
+            }
+
+        unmap_domain_page(pt);
+    }
+
+    iommu_queue_free_pgtable(hd, mfn_to_page(mfn));
+}
+
 int cf_check amd_iommu_map_page(
     struct domain *d, dfn_t dfn, mfn_t mfn, unsigned int flags,
     unsigned int *flush_flags)
 {
     struct domain_iommu *hd = dom_iommu(d);
+    unsigned int level = (IOMMUF_order(flags) / PTE_PER_TABLE_SHIFT) + 1;
     int rc;
     unsigned long pt_mfn = 0;
     union amd_iommu_pte old;
 
+    ASSERT((hd->platform_ops->page_sizes >> IOMMUF_order(flags)) &
+           PAGE_SIZE_4K);
+
     spin_lock(&hd->arch.mapping_lock);
 
     /*
@@ -384,7 +409,7 @@ int cf_check amd_iommu_map_page(
         return rc;
     }
 
-    if ( iommu_pde_from_dfn(d, dfn_x(dfn), 1, &pt_mfn, flush_flags, true) ||
+    if ( iommu_pde_from_dfn(d, dfn_x(dfn), level, &pt_mfn, flush_flags, true) 
||
          !pt_mfn )
     {
         spin_unlock(&hd->arch.mapping_lock);
@@ -394,8 +419,8 @@ int cf_check amd_iommu_map_page(
         return -EFAULT;
     }
 
-    /* Install 4k mapping */
-    old = set_iommu_pte_present(pt_mfn, dfn_x(dfn), mfn_x(mfn), 1,
+    /* Install mapping */
+    old = set_iommu_pte_present(pt_mfn, dfn_x(dfn), mfn_x(mfn), level,
                                 (flags & IOMMUF_writable),
                                 (flags & IOMMUF_readable));
 
@@ -403,8 +428,13 @@ int cf_check amd_iommu_map_page(
 
     *flush_flags |= IOMMU_FLUSHF_added;
     if ( old.pr )
+    {
         *flush_flags |= IOMMU_FLUSHF_modified;
 
+        if ( IOMMUF_order(flags) && old.next_level )
+            queue_free_pt(hd, _mfn(old.mfn), old.next_level);
+    }
+
     return 0;
 }
 
@@ -413,8 +443,15 @@ int cf_check amd_iommu_unmap_page(
 {
     unsigned long pt_mfn = 0;
     struct domain_iommu *hd = dom_iommu(d);
+    unsigned int level = (order / PTE_PER_TABLE_SHIFT) + 1;
     union amd_iommu_pte old = {};
 
+    /*
+     * While really we could unmap at any granularity, for now we assume unmaps
+     * are issued by common code only at the same granularity as maps.
+     */
+    ASSERT((hd->platform_ops->page_sizes >> order) & PAGE_SIZE_4K);
+
     spin_lock(&hd->arch.mapping_lock);
 
     if ( !hd->arch.amd.root_table )
@@ -423,7 +460,7 @@ int cf_check amd_iommu_unmap_page(
         return 0;
     }
 
-    if ( iommu_pde_from_dfn(d, dfn_x(dfn), 1, &pt_mfn, flush_flags, false) )
+    if ( iommu_pde_from_dfn(d, dfn_x(dfn), level, &pt_mfn, flush_flags, false) 
)
     {
         spin_unlock(&hd->arch.mapping_lock);
         AMD_IOMMU_ERROR("invalid IO pagetable entry dfn = %"PRI_dfn"\n",
@@ -435,14 +472,19 @@ int cf_check amd_iommu_unmap_page(
     if ( pt_mfn )
     {
         /* Mark PTE as 'page not present'. */
-        old = clear_iommu_pte_present(pt_mfn, dfn_x(dfn));
+        old = clear_iommu_pte_present(pt_mfn, dfn_x(dfn), level);
     }
 
     spin_unlock(&hd->arch.mapping_lock);
 
     if ( old.pr )
+    {
         *flush_flags |= IOMMU_FLUSHF_modified;
 
+        if ( order && old.next_level )
+            queue_free_pt(hd, _mfn(old.mfn), old.next_level);
+    }
+
     return 0;
 }
 
diff --git a/xen/drivers/passthrough/amd/pci_amd_iommu.c 
b/xen/drivers/passthrough/amd/pci_amd_iommu.c
index fd91f1367f..02aea838ad 100644
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -747,7 +747,7 @@ static void cf_check amd_dump_page_tables(struct domain *d)
 }
 
 static const struct iommu_ops __initconst_cf_clobber _iommu_ops = {
-    .page_sizes = PAGE_SIZE_4K,
+    .page_sizes = PAGE_SIZE_4K | PAGE_SIZE_2M | PAGE_SIZE_1G,
     .init = amd_iommu_domain_init,
     .hwdom_init = amd_iommu_hwdom_init,
     .quarantine_init = amd_iommu_quarantine_init,
diff --git a/xen/include/xen/page-defs.h b/xen/include/xen/page-defs.h
index 15a90779b5..540f8b0b64 100644
--- a/xen/include/xen/page-defs.h
+++ b/xen/include/xen/page-defs.h
@@ -21,4 +21,14 @@
 #define PAGE_MASK_64K               PAGE_MASK_GRAN(64K)
 #define PAGE_ALIGN_64K(addr)        PAGE_ALIGN_GRAN(64K, addr)
 
+#define PAGE_SHIFT_2M               21
+#define PAGE_SIZE_2M                PAGE_SIZE_GRAN(2M)
+#define PAGE_MASK_2M                PAGE_MASK_GRAN(2M)
+#define PAGE_ALIGN_2M(addr)         PAGE_ALIGN_GRAN(2M, addr)
+
+#define PAGE_SHIFT_1G               30
+#define PAGE_SIZE_1G                PAGE_SIZE_GRAN(1G)
+#define PAGE_MASK_1G                PAGE_MASK_GRAN(1G)
+#define PAGE_ALIGN_1G(addr)         PAGE_ALIGN_GRAN(1G, addr)
+
 #endif /* __XEN_PAGE_DEFS_H__ */
--
generated by git-patchbot for /home/xen/git/xen.git#staging



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.