[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen staging] VT-d: avoid PCI device lookup
commit 4067bbfa3bc02d9b80a25196681629dbd4456123 Author: Jan Beulich <jbeulich@xxxxxxxx> AuthorDate: Thu Sep 5 09:58:17 2019 +0200 Commit: Jan Beulich <jbeulich@xxxxxxxx> CommitDate: Thu Sep 5 09:58:17 2019 +0200 VT-d: avoid PCI device lookup The two uses of pci_get_pdev_by_domain() lack proper locking, but are also only used to get hold of a NUMA node ID. Calculate and store the node ID earlier on and remove the lookups (in lieu of fixing the locking). While doing this it became apparent that iommu_alloc()'s use of alloc_pgtable_maddr() would occur before RHSAs would have been parsed: iommu_alloc() gets called from the DRHD parsing routine, which - on spec conforming platforms - happens strictly before RHSA parsing. Defer the allocation until after all ACPI table parsing has finished, established the node ID there first. Suggested-by: Kevin Tian <kevin.tian@xxxxxxxxx> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> Reviewed-by: Kevin Tian <kevin.tian@xxxxxxxxx> --- xen/drivers/passthrough/iommu.c | 4 +++ xen/drivers/passthrough/vtd/dmar.c | 16 ++++++++++++ xen/drivers/passthrough/vtd/extern.h | 2 +- xen/drivers/passthrough/vtd/intremap.c | 4 +-- xen/drivers/passthrough/vtd/iommu.c | 46 ++++++++++++++-------------------- xen/drivers/passthrough/vtd/iommu.h | 1 + xen/drivers/passthrough/vtd/qinval.c | 4 +-- xen/include/xen/iommu.h | 5 ++++ 8 files changed, 50 insertions(+), 32 deletions(-) diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c index 37eb0f7d01..b82f778479 100644 --- a/xen/drivers/passthrough/iommu.c +++ b/xen/drivers/passthrough/iommu.c @@ -151,6 +151,10 @@ int iommu_domain_init(struct domain *d) struct domain_iommu *hd = dom_iommu(d); int ret = 0; +#ifdef CONFIG_NUMA + hd->node = NUMA_NO_NODE; +#endif + ret = arch_iommu_domain_init(d); if ( ret ) return ret; diff --git a/xen/drivers/passthrough/vtd/dmar.c b/xen/drivers/passthrough/vtd/dmar.c index 608be4883a..8398cc2763 100644 --- a/xen/drivers/passthrough/vtd/dmar.c +++ b/xen/drivers/passthrough/vtd/dmar.c @@ -965,6 +965,7 @@ int __init acpi_dmar_init(void) { acpi_physical_address dmar_addr; acpi_native_uint dmar_len; + const struct acpi_drhd_unit *drhd; int ret; if ( ACPI_SUCCESS(acpi_get_table_phys(ACPI_SIG_DMAR, 0, @@ -978,6 +979,21 @@ int __init acpi_dmar_init(void) ret = parse_dmar_table(acpi_parse_dmar); + for_each_drhd_unit ( drhd ) + { + const struct acpi_rhsa_unit *rhsa = drhd_to_rhsa(drhd); + struct iommu *iommu = drhd->iommu; + + if ( ret ) + break; + + if ( rhsa ) + iommu->node = pxm_to_node(rhsa->proximity_domain); + + if ( !(iommu->root_maddr = alloc_pgtable_maddr(1, iommu->node)) ) + ret = -ENOMEM; + } + if ( !ret ) { iommu_init_ops = &intel_iommu_init_ops; diff --git a/xen/drivers/passthrough/vtd/extern.h b/xen/drivers/passthrough/vtd/extern.h index 92519f6c8c..5f50971ed2 100644 --- a/xen/drivers/passthrough/vtd/extern.h +++ b/xen/drivers/passthrough/vtd/extern.h @@ -73,7 +73,7 @@ unsigned int get_cache_line_size(void); void cacheline_flush(char *); void flush_all_cache(void); -u64 alloc_pgtable_maddr(struct acpi_drhd_unit *drhd, unsigned long npages); +uint64_t alloc_pgtable_maddr(unsigned long npages, nodeid_t node); void free_pgtable_maddr(u64 maddr); void *map_vtd_domain_page(u64 maddr); void unmap_vtd_domain_page(void *va); diff --git a/xen/drivers/passthrough/vtd/intremap.c b/xen/drivers/passthrough/vtd/intremap.c index e8d6091eb4..0a05ec654f 100644 --- a/xen/drivers/passthrough/vtd/intremap.c +++ b/xen/drivers/passthrough/vtd/intremap.c @@ -795,8 +795,8 @@ int enable_intremap(struct iommu *iommu, int eim) if ( ir_ctrl->iremap_maddr == 0 ) { - ir_ctrl->iremap_maddr = alloc_pgtable_maddr(iommu->intel->drhd, - IREMAP_ARCH_PAGE_NR); + ir_ctrl->iremap_maddr = alloc_pgtable_maddr(IREMAP_ARCH_PAGE_NR, + iommu->node); if ( ir_ctrl->iremap_maddr == 0 ) { dprintk(XENLOG_WARNING VTDPREFIX, diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c index efbf2ee3ec..19fcd4fca5 100644 --- a/xen/drivers/passthrough/vtd/iommu.c +++ b/xen/drivers/passthrough/vtd/iommu.c @@ -184,18 +184,12 @@ void iommu_flush_cache_page(void *addr, unsigned long npages) } /* Allocate page table, return its machine address */ -u64 alloc_pgtable_maddr(struct acpi_drhd_unit *drhd, unsigned long npages) +uint64_t alloc_pgtable_maddr(unsigned long npages, nodeid_t node) { - struct acpi_rhsa_unit *rhsa; struct page_info *pg, *cur_pg; u64 *vaddr; - nodeid_t node = NUMA_NO_NODE; unsigned int i; - rhsa = drhd_to_rhsa(drhd); - if ( rhsa ) - node = pxm_to_node(rhsa->proximity_domain); - pg = alloc_domheap_pages(NULL, get_order_from_pages(npages), (node == NUMA_NO_NODE) ? 0 : MEMF_node(node)); if ( !pg ) @@ -232,7 +226,7 @@ static u64 bus_to_context_maddr(struct iommu *iommu, u8 bus) root = &root_entries[bus]; if ( !root_present(*root) ) { - maddr = alloc_pgtable_maddr(iommu->intel->drhd, 1); + maddr = alloc_pgtable_maddr(1, iommu->node); if ( maddr == 0 ) { unmap_vtd_domain_page(root_entries); @@ -249,8 +243,6 @@ static u64 bus_to_context_maddr(struct iommu *iommu, u8 bus) static u64 addr_to_dma_page_maddr(struct domain *domain, u64 addr, int alloc) { - struct acpi_drhd_unit *drhd; - struct pci_dev *pdev; struct domain_iommu *hd = dom_iommu(domain); int addr_width = agaw_to_width(hd->arch.agaw); struct dma_pte *parent, *pte = NULL; @@ -260,17 +252,10 @@ static u64 addr_to_dma_page_maddr(struct domain *domain, u64 addr, int alloc) addr &= (((u64)1) << addr_width) - 1; ASSERT(spin_is_locked(&hd->arch.mapping_lock)); - if ( hd->arch.pgd_maddr == 0 ) - { - /* - * just get any passthrough device in the domainr - assume user - * assigns only devices from same node to a given guest. - */ - pdev = pci_get_pdev_by_domain(domain, -1, -1, -1); - drhd = acpi_find_matched_drhd_unit(pdev); - if ( !alloc || ((hd->arch.pgd_maddr = alloc_pgtable_maddr(drhd, 1)) == 0) ) - goto out; - } + if ( !hd->arch.pgd_maddr && + (!alloc || + ((hd->arch.pgd_maddr = alloc_pgtable_maddr(1, hd->node)) == 0)) ) + goto out; parent = (struct dma_pte *)map_vtd_domain_page(hd->arch.pgd_maddr); while ( level > 1 ) @@ -284,9 +269,7 @@ static u64 addr_to_dma_page_maddr(struct domain *domain, u64 addr, int alloc) if ( !alloc ) break; - pdev = pci_get_pdev_by_domain(domain, -1, -1, -1); - drhd = acpi_find_matched_drhd_unit(pdev); - pte_maddr = alloc_pgtable_maddr(drhd, 1); + pte_maddr = alloc_pgtable_maddr(1, hd->node); if ( !pte_maddr ) break; @@ -1181,6 +1164,7 @@ int __init iommu_alloc(struct acpi_drhd_unit *drhd) return -ENOMEM; iommu->msi.irq = -1; /* No irq assigned yet. */ + iommu->node = NUMA_NO_NODE; INIT_LIST_HEAD(&iommu->ats_devices); iommu->intel = alloc_intel_iommu(); @@ -1192,9 +1176,6 @@ int __init iommu_alloc(struct acpi_drhd_unit *drhd) iommu->intel->drhd = drhd; drhd->iommu = iommu; - if ( !(iommu->root_maddr = alloc_pgtable_maddr(drhd, 1)) ) - return -ENOMEM; - iommu->reg = ioremap(drhd->address, PAGE_SIZE); if ( !iommu->reg ) return -ENOMEM; @@ -1488,6 +1469,17 @@ static int domain_context_mapping(struct domain *domain, u8 devfn, if ( !drhd ) return -ENODEV; + /* + * Generally we assume only devices from one node to get assigned to a + * given guest. But even if not, by replacing the prior value here we + * guarantee that at least some basic allocations for the device being + * added will get done against its node. Any further allocations for + * this or other devices may be penalized then, but some would also be + * if we left other than NUMA_NO_NODE untouched here. + */ + if ( drhd->iommu->node != NUMA_NO_NODE ) + dom_iommu(domain)->node = drhd->iommu->node; + ASSERT(pcidevs_locked()); switch ( pdev->type ) diff --git a/xen/drivers/passthrough/vtd/iommu.h b/xen/drivers/passthrough/vtd/iommu.h index c9290a3996..c2490784d6 100644 --- a/xen/drivers/passthrough/vtd/iommu.h +++ b/xen/drivers/passthrough/vtd/iommu.h @@ -542,6 +542,7 @@ struct iommu { spinlock_t lock; /* protect context, domain ids */ spinlock_t register_lock; /* protect iommu register handling */ u64 root_maddr; /* root entry machine address */ + nodeid_t node; struct msi_desc msi; struct intel_iommu *intel; struct list_head ats_devices; diff --git a/xen/drivers/passthrough/vtd/qinval.c b/xen/drivers/passthrough/vtd/qinval.c index ca1a5acd43..980f20b8b9 100644 --- a/xen/drivers/passthrough/vtd/qinval.c +++ b/xen/drivers/passthrough/vtd/qinval.c @@ -415,8 +415,8 @@ int enable_qinval(struct iommu *iommu) if ( qi_ctrl->qinval_maddr == 0 ) { - qi_ctrl->qinval_maddr = alloc_pgtable_maddr(iommu->intel->drhd, - QINVAL_ARCH_PAGE_NR); + qi_ctrl->qinval_maddr = alloc_pgtable_maddr(QINVAL_ARCH_PAGE_NR, + iommu->node); if ( qi_ctrl->qinval_maddr == 0 ) { dprintk(XENLOG_WARNING VTDPREFIX, diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h index 314f28f323..ab258b848b 100644 --- a/xen/include/xen/iommu.h +++ b/xen/include/xen/iommu.h @@ -266,6 +266,11 @@ struct domain_iommu { struct list_head dt_devices; #endif +#ifdef CONFIG_NUMA + /* NUMA node to do IOMMU related allocations against. */ + nodeid_t node; +#endif + /* Features supported by the IOMMU */ DECLARE_BITMAP(features, IOMMU_FEAT_count); -- generated by git-patchbot for /home/xen/git/xen.git#staging _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |