[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [PATCH 3/3] x86/iommu: use a rangeset for hwdom setup
On Fri, Nov 17, 2023 at 10:47:49AM +0100, Roger Pau Monne wrote: > The current loop that iterates from 0 to the maximum RAM address in order to > setup the IOMMU mappings is highly inefficient, and it will get worse as the > amount of RAM increases. It's also not accounting for any reserved regions > past the last RAM address. > > Instead of iterating over memory addresses, iterate over the memory map > regions > and use a rangeset in order to keep track of which ranges need to be identity > mapped in the hardware domain physical address space. > > On an AMD EPYC 7452 with 512GiB of RAM, the time to execute > arch_iommu_hwdom_init() in nanoseconds is: > > x old > + new > N Min Max Median Avg Stddev > x 5 2.2364154e+10 2.338244e+10 2.2474685e+10 2.2622409e+10 4.2949869e+08 > + 5 1025012 1033036 1026188 1028276.2 3623.1194 > Difference at 95.0% confidence > -2.26214e+10 +/- 4.42931e+08 > -99.9955% +/- 9.05152e-05% > (Student's t, pooled s = 3.03701e+08) > > Execution time of arch_iommu_hwdom_init() goes down from ~22s to ~0.001s. > > Note there's a change for HVM domains (ie: PVH dom0) that get switched to > create the p2m mappings using map_mmio_regions() instead of > p2m_add_identity_entry(), so that ranges can be mapped with a single function > call if possible. Note that the interface of map_mmio_regions() doesn't > allow creating read-only mappings, but so far there are no such mappings > created for PVH dom0 in arch_iommu_hwdom_init(). > > No change intended in the resulting mappings that a hardware domain gets. > > Signed-off-by: Roger Pau Monné <roger.pau@xxxxxxxxxx> > --- > xen/arch/x86/hvm/io.c | 15 +- > xen/arch/x86/include/asm/hvm/io.h | 4 +- > xen/drivers/passthrough/x86/iommu.c | 355 +++++++++++++++++----------- > 3 files changed, 231 insertions(+), 143 deletions(-) > > diff --git a/xen/arch/x86/hvm/io.c b/xen/arch/x86/hvm/io.c > index d75af83ad01f..7c4b7317b13a 100644 > --- a/xen/arch/x86/hvm/io.c > +++ b/xen/arch/x86/hvm/io.c > @@ -364,9 +364,20 @@ static const struct hvm_mmcfg *vpci_mmcfg_find(const > struct domain *d, > return NULL; > } > > -bool vpci_is_mmcfg_address(const struct domain *d, paddr_t addr) > +int vpci_subtract_mmcfg(const struct domain *d, struct rangeset *r) > { > - return vpci_mmcfg_find(d, addr); > + const struct hvm_mmcfg *mmcfg; > + > + list_for_each_entry ( mmcfg, &d->arch.hvm.mmcfg_regions, next ) > + { > + int rc = rangeset_remove_range(r, PFN_DOWN(mmcfg->addr), > + PFN_DOWN(mmcfg->addr + mmcfg->size - > 1)); > + > + if ( rc ) > + return rc; > + } > + > + return 0; > } > > static unsigned int vpci_mmcfg_decode_addr(const struct hvm_mmcfg *mmcfg, > diff --git a/xen/arch/x86/include/asm/hvm/io.h > b/xen/arch/x86/include/asm/hvm/io.h > index e5225e75ef26..c9d058fd5695 100644 > --- a/xen/arch/x86/include/asm/hvm/io.h > +++ b/xen/arch/x86/include/asm/hvm/io.h > @@ -153,8 +153,8 @@ int register_vpci_mmcfg_handler(struct domain *d, paddr_t > addr, > /* Destroy tracked MMCFG areas. */ > void destroy_vpci_mmcfg(struct domain *d); > > -/* Check if an address is between a MMCFG region for a domain. */ > -bool vpci_is_mmcfg_address(const struct domain *d, paddr_t addr); > +/* Remove MMCFG regions from a given rangeset. */ > +int vpci_subtract_mmcfg(const struct domain *d, struct rangeset *r); > > #endif /* __ASM_X86_HVM_IO_H__ */ > > diff --git a/xen/drivers/passthrough/x86/iommu.c > b/xen/drivers/passthrough/x86/iommu.c > index d70cee9fea77..be2c909f61d8 100644 > --- a/xen/drivers/passthrough/x86/iommu.c > +++ b/xen/drivers/passthrough/x86/iommu.c > @@ -301,129 +301,133 @@ void iommu_identity_map_teardown(struct domain *d) > } > } > > -static int __hwdom_init xen_in_range(unsigned long mfn) > +static int __hwdom_init remove_xen_ranges(struct rangeset *r) > { > paddr_t start, end; > - int i; > - > - enum { region_s3, region_ro, region_rw, region_bss, nr_regions }; > - static struct { > - paddr_t s, e; > - } xen_regions[nr_regions] __hwdom_initdata; > + int rc; > > - /* initialize first time */ > - if ( !xen_regions[0].s ) > - { > - /* S3 resume code (and other real mode trampoline code) */ > - xen_regions[region_s3].s = bootsym_phys(trampoline_start); > - xen_regions[region_s3].e = bootsym_phys(trampoline_end); > + /* S3 resume code (and other real mode trampoline code) */ > + rc = rangeset_remove_range(r, PFN_DOWN(bootsym_phys(trampoline_start)), > + PFN_DOWN(bootsym_phys(trampoline_end))); > + if ( rc ) > + return rc; > > - /* > - * This needs to remain in sync with the uses of the same symbols in > - * - __start_xen() > - * - is_xen_fixed_mfn() > - * - tboot_shutdown() > - */ > + /* > + * This needs to remain in sync with the uses of the same symbols in > + * - __start_xen() > + * - is_xen_fixed_mfn() > + * - tboot_shutdown() > + */ > + /* hypervisor .text + .rodata */ > + rc = rangeset_remove_range(r, PFN_DOWN(__pa(&_stext)), > + PFN_DOWN(__pa(&__2M_rodata_end))); > + if ( rc ) > + return rc; > > - /* hypervisor .text + .rodata */ > - xen_regions[region_ro].s = __pa(&_stext); > - xen_regions[region_ro].e = __pa(&__2M_rodata_end); > - /* hypervisor .data + .bss */ > - xen_regions[region_rw].s = __pa(&__2M_rwdata_start); > - xen_regions[region_rw].e = __pa(&__2M_rwdata_end); > - if ( efi_boot_mem_unused(&start, &end) ) > - { > - ASSERT(__pa(start) >= xen_regions[region_rw].s); > - ASSERT(__pa(end) <= xen_regions[region_rw].e); > - xen_regions[region_rw].e = __pa(start); > - xen_regions[region_bss].s = __pa(end); > - xen_regions[region_bss].e = __pa(&__2M_rwdata_end); > - } > + /* hypervisor .data + .bss */ > + if ( efi_boot_mem_unused(&start, &end) ) > + { > + ASSERT(__pa(start) >= __pa(&__2M_rwdata_start)); > + rc = rangeset_remove_range(r, PFN_DOWN(__pa(&__2M_rwdata_start)), > + PFN_DOWN(__pa(start))); > + if ( rc ) > + return rc; > + ASSERT(__pa(end) <= __pa(&__2M_rwdata_end)); > + rc = rangeset_remove_range(r, PFN_DOWN(__pa(end)), > + PFN_DOWN(__pa(&__2M_rwdata_end))); > + if ( rc ) > + return rc; > + } > + else > + { > + rc = rangeset_remove_range(r, PFN_DOWN(__pa(&__2M_rwdata_start)), > + PFN_DOWN(__pa(&__2M_rwdata_end))); > + if ( rc ) > + return rc; > } > - > - start = (paddr_t)mfn << PAGE_SHIFT; > - end = start + PAGE_SIZE; > - for ( i = 0; i < nr_regions; i++ ) > - if ( (start < xen_regions[i].e) && (end > xen_regions[i].s) ) > - return 1; > > return 0; > } > > -static unsigned int __hwdom_init hwdom_iommu_map(const struct domain *d, > - unsigned long pfn, > - unsigned long max_pfn) > +static int __hwdom_init map_subtract(unsigned long s, unsigned long e, Bah, this (and others below) are missing cf_check attribute. Will fix in v2. Roger.
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |