[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[xen master] x86/iommu: switch hwdom IOMMU to use a rangeset



commit 0e1bd15a1d5d3576c77061ad84db5ff186ca45fc
Author:     Roger Pau Monne <roger.pau@xxxxxxxxxx>
AuthorDate: Wed Jan 24 18:29:52 2024 +0100
Commit:     Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
CommitDate: Mon Jan 29 17:25:15 2024 +0000

    x86/iommu: switch hwdom IOMMU to use a rangeset
    
    The current loop that iterates from 0 to the maximum RAM address in order to
    setup the IOMMU mappings is highly inefficient, and it will get worse as the
    amount of RAM increases.  It's also not accounting for any reserved regions
    past the last RAM address.
    
    Instead of iterating over memory addresses, iterate over the memory map 
regions
    and use a rangeset in order to keep track of which ranges need to be 
identity
    mapped in the hardware domain physical address space.
    
    On an AMD EPYC 7452 with 512GiB of RAM, the time to execute
    arch_iommu_hwdom_init() in nanoseconds is:
    
    x old
    + new
        N           Min           Max        Median           Avg        Stddev
    x   5 2.2364154e+10  2.338244e+10 2.2474685e+10 2.2622409e+10 4.2949869e+08
    +   5       1025012       1033036       1026188     1028276.2     3623.1194
    Difference at 95.0% confidence
            -2.26214e+10 +/- 4.42931e+08
            -99.9955% +/- 9.05152e-05%
            (Student's t, pooled s = 3.03701e+08)
    
    Execution time of arch_iommu_hwdom_init() goes down from ~22s to ~0.001s.
    
    Note there's a change for HVM domains (ie: PVH dom0) that get switched to
    create the p2m mappings using map_mmio_regions() instead of
    p2m_add_identity_entry(), so that ranges can be mapped with a single 
function
    call if possible.  Note that the interface of map_mmio_regions() doesn't
    allow creating read-only mappings, but so far there are no such mappings
    created for PVH dom0 in arch_iommu_hwdom_init().
    
    No change intended in the resulting mappings that a hardware domain gets.
    
    Signed-off-by: Roger Pau Monné <roger.pau@xxxxxxxxxx>
    Reviewed-by: Paul Durrant <paul@xxxxxxx>
    Reviewed-by: Jan Beulich <jbeulich@xxxxxxxx>
---
 CHANGELOG.md                        |   1 +
 xen/drivers/passthrough/x86/iommu.c | 149 ++++++++----------------------------
 2 files changed, 34 insertions(+), 116 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ddb3ab8db4..1f55c9c72d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,7 @@ The format is based on [Keep a 
Changelog](https://keepachangelog.com/en/1.0.0/)
    use "1" as the number of array elements.
  - On x86:
    - HVM PIRQs are disabled by default.
+   - Reduce IOMMU setup time for hardware domain.
 
 ### Added
  - On x86:
diff --git a/xen/drivers/passthrough/x86/iommu.c 
b/xen/drivers/passthrough/x86/iommu.c
index fc5215a9dc..c90755ff58 100644
--- a/xen/drivers/passthrough/x86/iommu.c
+++ b/xen/drivers/passthrough/x86/iommu.c
@@ -300,76 +300,6 @@ void iommu_identity_map_teardown(struct domain *d)
     }
 }
 
-static unsigned int __hwdom_init hwdom_iommu_map(const struct domain *d,
-                                                 unsigned long pfn,
-                                                 unsigned long max_pfn)
-{
-    mfn_t mfn = _mfn(pfn);
-    unsigned int i, type, perms = IOMMUF_readable | IOMMUF_writable;
-
-    /*
-     * Set up 1:1 mapping for dom0. Default to include only conventional RAM
-     * areas and let RMRRs include needed reserved regions. When set, the
-     * inclusive mapping additionally maps in every pfn up to 4GB except those
-     * that fall in unusable ranges for PV Dom0.
-     */
-    if ( (pfn > max_pfn && !mfn_valid(mfn)) || xen_in_range(pfn) )
-        return 0;
-
-    switch ( type = page_get_ram_type(mfn) )
-    {
-    case RAM_TYPE_UNUSABLE:
-        return 0;
-
-    case RAM_TYPE_CONVENTIONAL:
-        if ( iommu_hwdom_strict )
-            return 0;
-        break;
-
-    default:
-        if ( type & RAM_TYPE_RESERVED )
-        {
-            if ( !iommu_hwdom_inclusive && !iommu_hwdom_reserved )
-                perms = 0;
-        }
-        else if ( is_hvm_domain(d) )
-            return 0;
-        else if ( !iommu_hwdom_inclusive || pfn > max_pfn )
-            perms = 0;
-    }
-
-    /* Check that it doesn't overlap with the Interrupt Address Range. */
-    if ( pfn >= 0xfee00 && pfn <= 0xfeeff )
-        return 0;
-    /* ... or the IO-APIC */
-    if ( has_vioapic(d) )
-    {
-        for ( i = 0; i < d->arch.hvm.nr_vioapics; i++ )
-            if ( pfn == PFN_DOWN(domain_vioapic(d, i)->base_address) )
-                return 0;
-    }
-    else if ( is_pv_domain(d) )
-    {
-        /*
-         * Be consistent with CPU mappings: Dom0 is permitted to establish r/o
-         * ones there (also for e.g. HPET in certain cases), so it should also
-         * have such established for IOMMUs.
-         */
-        if ( iomem_access_permitted(d, pfn, pfn) &&
-             rangeset_contains_singleton(mmio_ro_ranges, pfn) )
-            perms = IOMMUF_readable;
-    }
-    /*
-     * ... or the PCIe MCFG regions.
-     * TODO: runtime added MMCFG regions are not checked to make sure they
-     * don't overlap with already mapped regions, thus preventing trapping.
-     */
-    if ( has_vpci(d) && vpci_is_mmcfg_address(d, pfn_to_paddr(pfn)) )
-        return 0;
-
-    return perms;
-}
-
 static int __hwdom_init cf_check map_subtract(unsigned long s, unsigned long e,
                                               void *data)
 {
@@ -455,8 +385,7 @@ static int __hwdom_init cf_check identity_map(unsigned long 
s, unsigned long e,
 
 void __hwdom_init arch_iommu_hwdom_init(struct domain *d)
 {
-    unsigned long i, top, max_pfn, start, count;
-    unsigned int start_perms = 0;
+    unsigned int i;
     struct rangeset *map;
     struct map_data map_data = { .d = d };
     int rc;
@@ -487,58 +416,46 @@ void __hwdom_init arch_iommu_hwdom_init(struct domain *d)
     if ( !map )
         panic("IOMMU init: unable to allocate rangeset\n");
 
-    max_pfn = (GB(4) >> PAGE_SHIFT) - 1;
-    top = max(max_pdx, pfn_to_pdx(max_pfn) + 1);
+    if ( iommu_hwdom_inclusive )
+    {
+        /* Add the whole range below 4GB, UNUSABLE regions will be removed. */
+        rc = rangeset_add_range(map, 0, PFN_DOWN(GB(4)) - 1);
+        if ( rc )
+            panic("IOMMU inclusive mappings can't be added: %d\n", rc);
+    }
 
-    for ( i = 0, start = 0, count = 0; i < top; )
+    for ( i = 0; i < e820.nr_map; i++ )
     {
-        unsigned long pfn = pdx_to_pfn(i);
-        unsigned int perms = hwdom_iommu_map(d, pfn, max_pfn);
+        const struct e820entry entry = e820.map[i];
 
-        if ( !perms )
-            /* nothing */;
-        else if ( paging_mode_translate(d) )
+        switch ( entry.type )
         {
-            int rc;
-
-            rc = p2m_add_identity_entry(d, pfn,
-                                        perms & IOMMUF_writable ? p2m_access_rw
-                                                                : p2m_access_r,
-                                        0);
+        case E820_UNUSABLE:
+            /* Only relevant for inclusive mode, otherwise this is a no-op. */
+            rc = rangeset_remove_range(map, PFN_DOWN(entry.addr),
+                                       PFN_DOWN(entry.addr + entry.size - 1));
             if ( rc )
-                printk(XENLOG_WARNING
-                       "%pd: identity mapping of %lx failed: %d\n",
-                       d, pfn, rc);
-        }
-        else if ( pfn != start + count || perms != start_perms )
-        {
-            long rc;
+                panic("IOMMU failed to remove unusable memory: %d\n", rc);
+            continue;
 
-        commit:
-            while ( (rc = iommu_map(d, _dfn(start), _mfn(start), count,
-                                    start_perms | IOMMUF_preempt,
-                                    &map_data.flush_flags)) > 0 )
-            {
-                start += rc;
-                count -= rc;
-                process_pending_softirqs();
-            }
-            if ( rc )
-                printk(XENLOG_WARNING
-                       "%pd: IOMMU identity mapping of [%lx,%lx) failed: 
%ld\n",
-                       d, start, start + count, rc);
-            start = pfn;
-            count = 1;
-            start_perms = perms;
-        }
-        else
-            ++count;
+        case E820_RESERVED:
+            if ( !iommu_hwdom_inclusive && !iommu_hwdom_reserved )
+                continue;
+            break;
 
-        if ( !(++i & 0xfffff) )
-            process_pending_softirqs();
+        case E820_RAM:
+            if ( iommu_hwdom_strict )
+                continue;
+            break;
 
-        if ( i == top && count )
-            goto commit;
+        default:
+            continue;
+        }
+
+        rc = rangeset_add_range(map, PFN_DOWN(entry.addr),
+                                PFN_DOWN(entry.addr + entry.size - 1));
+        if ( rc )
+            panic("IOMMU failed to add identity range: %d\n", rc);
     }
 
     /* Remove any areas in-use by Xen. */
--
generated by git-patchbot for /home/xen/git/xen.git#master



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.