[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen staging] VT-d: avoid PCI device lookup



commit 4067bbfa3bc02d9b80a25196681629dbd4456123
Author:     Jan Beulich <jbeulich@xxxxxxxx>
AuthorDate: Thu Sep 5 09:58:17 2019 +0200
Commit:     Jan Beulich <jbeulich@xxxxxxxx>
CommitDate: Thu Sep 5 09:58:17 2019 +0200

    VT-d: avoid PCI device lookup
    
    The two uses of pci_get_pdev_by_domain() lack proper locking, but are
    also only used to get hold of a NUMA node ID. Calculate and store the
    node ID earlier on and remove the lookups (in lieu of fixing the
    locking).
    
    While doing this it became apparent that iommu_alloc()'s use of
    alloc_pgtable_maddr() would occur before RHSAs would have been parsed:
    iommu_alloc() gets called from the DRHD parsing routine, which - on
    spec conforming platforms - happens strictly before RHSA parsing. Defer
    the allocation until after all ACPI table parsing has finished,
    established the node ID there first.
    
    Suggested-by: Kevin Tian <kevin.tian@xxxxxxxxx>
    Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
    Reviewed-by: Kevin Tian <kevin.tian@xxxxxxxxx>
---
 xen/drivers/passthrough/iommu.c        |  4 +++
 xen/drivers/passthrough/vtd/dmar.c     | 16 ++++++++++++
 xen/drivers/passthrough/vtd/extern.h   |  2 +-
 xen/drivers/passthrough/vtd/intremap.c |  4 +--
 xen/drivers/passthrough/vtd/iommu.c    | 46 ++++++++++++++--------------------
 xen/drivers/passthrough/vtd/iommu.h    |  1 +
 xen/drivers/passthrough/vtd/qinval.c   |  4 +--
 xen/include/xen/iommu.h                |  5 ++++
 8 files changed, 50 insertions(+), 32 deletions(-)

diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c
index 37eb0f7d01..b82f778479 100644
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -151,6 +151,10 @@ int iommu_domain_init(struct domain *d)
     struct domain_iommu *hd = dom_iommu(d);
     int ret = 0;
 
+#ifdef CONFIG_NUMA
+    hd->node = NUMA_NO_NODE;
+#endif
+
     ret = arch_iommu_domain_init(d);
     if ( ret )
         return ret;
diff --git a/xen/drivers/passthrough/vtd/dmar.c 
b/xen/drivers/passthrough/vtd/dmar.c
index 608be4883a..8398cc2763 100644
--- a/xen/drivers/passthrough/vtd/dmar.c
+++ b/xen/drivers/passthrough/vtd/dmar.c
@@ -965,6 +965,7 @@ int __init acpi_dmar_init(void)
 {
     acpi_physical_address dmar_addr;
     acpi_native_uint dmar_len;
+    const struct acpi_drhd_unit *drhd;
     int ret;
 
     if ( ACPI_SUCCESS(acpi_get_table_phys(ACPI_SIG_DMAR, 0,
@@ -978,6 +979,21 @@ int __init acpi_dmar_init(void)
 
     ret = parse_dmar_table(acpi_parse_dmar);
 
+    for_each_drhd_unit ( drhd )
+    {
+        const struct acpi_rhsa_unit *rhsa = drhd_to_rhsa(drhd);
+        struct iommu *iommu = drhd->iommu;
+
+        if ( ret )
+            break;
+
+        if ( rhsa )
+            iommu->node = pxm_to_node(rhsa->proximity_domain);
+
+        if ( !(iommu->root_maddr = alloc_pgtable_maddr(1, iommu->node)) )
+            ret = -ENOMEM;
+    }
+
     if ( !ret )
     {
         iommu_init_ops = &intel_iommu_init_ops;
diff --git a/xen/drivers/passthrough/vtd/extern.h 
b/xen/drivers/passthrough/vtd/extern.h
index 92519f6c8c..5f50971ed2 100644
--- a/xen/drivers/passthrough/vtd/extern.h
+++ b/xen/drivers/passthrough/vtd/extern.h
@@ -73,7 +73,7 @@ unsigned int get_cache_line_size(void);
 void cacheline_flush(char *);
 void flush_all_cache(void);
 
-u64 alloc_pgtable_maddr(struct acpi_drhd_unit *drhd, unsigned long npages);
+uint64_t alloc_pgtable_maddr(unsigned long npages, nodeid_t node);
 void free_pgtable_maddr(u64 maddr);
 void *map_vtd_domain_page(u64 maddr);
 void unmap_vtd_domain_page(void *va);
diff --git a/xen/drivers/passthrough/vtd/intremap.c 
b/xen/drivers/passthrough/vtd/intremap.c
index e8d6091eb4..0a05ec654f 100644
--- a/xen/drivers/passthrough/vtd/intremap.c
+++ b/xen/drivers/passthrough/vtd/intremap.c
@@ -795,8 +795,8 @@ int enable_intremap(struct iommu *iommu, int eim)
 
     if ( ir_ctrl->iremap_maddr == 0 )
     {
-        ir_ctrl->iremap_maddr = alloc_pgtable_maddr(iommu->intel->drhd,
-                                                    IREMAP_ARCH_PAGE_NR);
+        ir_ctrl->iremap_maddr = alloc_pgtable_maddr(IREMAP_ARCH_PAGE_NR,
+                                                    iommu->node);
         if ( ir_ctrl->iremap_maddr == 0 )
         {
             dprintk(XENLOG_WARNING VTDPREFIX,
diff --git a/xen/drivers/passthrough/vtd/iommu.c 
b/xen/drivers/passthrough/vtd/iommu.c
index efbf2ee3ec..19fcd4fca5 100644
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -184,18 +184,12 @@ void iommu_flush_cache_page(void *addr, unsigned long 
npages)
 }
 
 /* Allocate page table, return its machine address */
-u64 alloc_pgtable_maddr(struct acpi_drhd_unit *drhd, unsigned long npages)
+uint64_t alloc_pgtable_maddr(unsigned long npages, nodeid_t node)
 {
-    struct acpi_rhsa_unit *rhsa;
     struct page_info *pg, *cur_pg;
     u64 *vaddr;
-    nodeid_t node = NUMA_NO_NODE;
     unsigned int i;
 
-    rhsa = drhd_to_rhsa(drhd);
-    if ( rhsa )
-        node =  pxm_to_node(rhsa->proximity_domain);
-
     pg = alloc_domheap_pages(NULL, get_order_from_pages(npages),
                              (node == NUMA_NO_NODE) ? 0 : MEMF_node(node));
     if ( !pg )
@@ -232,7 +226,7 @@ static u64 bus_to_context_maddr(struct iommu *iommu, u8 bus)
     root = &root_entries[bus];
     if ( !root_present(*root) )
     {
-        maddr = alloc_pgtable_maddr(iommu->intel->drhd, 1);
+        maddr = alloc_pgtable_maddr(1, iommu->node);
         if ( maddr == 0 )
         {
             unmap_vtd_domain_page(root_entries);
@@ -249,8 +243,6 @@ static u64 bus_to_context_maddr(struct iommu *iommu, u8 bus)
 
 static u64 addr_to_dma_page_maddr(struct domain *domain, u64 addr, int alloc)
 {
-    struct acpi_drhd_unit *drhd;
-    struct pci_dev *pdev;
     struct domain_iommu *hd = dom_iommu(domain);
     int addr_width = agaw_to_width(hd->arch.agaw);
     struct dma_pte *parent, *pte = NULL;
@@ -260,17 +252,10 @@ static u64 addr_to_dma_page_maddr(struct domain *domain, 
u64 addr, int alloc)
 
     addr &= (((u64)1) << addr_width) - 1;
     ASSERT(spin_is_locked(&hd->arch.mapping_lock));
-    if ( hd->arch.pgd_maddr == 0 )
-    {
-        /*
-         * just get any passthrough device in the domainr - assume user
-         * assigns only devices from same node to a given guest.
-         */
-        pdev = pci_get_pdev_by_domain(domain, -1, -1, -1);
-        drhd = acpi_find_matched_drhd_unit(pdev);
-        if ( !alloc || ((hd->arch.pgd_maddr = alloc_pgtable_maddr(drhd, 1)) == 
0) )
-            goto out;
-    }
+    if ( !hd->arch.pgd_maddr &&
+         (!alloc ||
+          ((hd->arch.pgd_maddr = alloc_pgtable_maddr(1, hd->node)) == 0)) )
+        goto out;
 
     parent = (struct dma_pte *)map_vtd_domain_page(hd->arch.pgd_maddr);
     while ( level > 1 )
@@ -284,9 +269,7 @@ static u64 addr_to_dma_page_maddr(struct domain *domain, 
u64 addr, int alloc)
             if ( !alloc )
                 break;
 
-            pdev = pci_get_pdev_by_domain(domain, -1, -1, -1);
-            drhd = acpi_find_matched_drhd_unit(pdev);
-            pte_maddr = alloc_pgtable_maddr(drhd, 1);
+            pte_maddr = alloc_pgtable_maddr(1, hd->node);
             if ( !pte_maddr )
                 break;
 
@@ -1181,6 +1164,7 @@ int __init iommu_alloc(struct acpi_drhd_unit *drhd)
         return -ENOMEM;
 
     iommu->msi.irq = -1; /* No irq assigned yet. */
+    iommu->node = NUMA_NO_NODE;
     INIT_LIST_HEAD(&iommu->ats_devices);
 
     iommu->intel = alloc_intel_iommu();
@@ -1192,9 +1176,6 @@ int __init iommu_alloc(struct acpi_drhd_unit *drhd)
     iommu->intel->drhd = drhd;
     drhd->iommu = iommu;
 
-    if ( !(iommu->root_maddr = alloc_pgtable_maddr(drhd, 1)) )
-        return -ENOMEM;
-
     iommu->reg = ioremap(drhd->address, PAGE_SIZE);
     if ( !iommu->reg )
         return -ENOMEM;
@@ -1488,6 +1469,17 @@ static int domain_context_mapping(struct domain *domain, 
u8 devfn,
     if ( !drhd )
         return -ENODEV;
 
+    /*
+     * Generally we assume only devices from one node to get assigned to a
+     * given guest.  But even if not, by replacing the prior value here we
+     * guarantee that at least some basic allocations for the device being
+     * added will get done against its node.  Any further allocations for
+     * this or other devices may be penalized then, but some would also be
+     * if we left other than NUMA_NO_NODE untouched here.
+     */
+    if ( drhd->iommu->node != NUMA_NO_NODE )
+        dom_iommu(domain)->node = drhd->iommu->node;
+
     ASSERT(pcidevs_locked());
 
     switch ( pdev->type )
diff --git a/xen/drivers/passthrough/vtd/iommu.h 
b/xen/drivers/passthrough/vtd/iommu.h
index c9290a3996..c2490784d6 100644
--- a/xen/drivers/passthrough/vtd/iommu.h
+++ b/xen/drivers/passthrough/vtd/iommu.h
@@ -542,6 +542,7 @@ struct iommu {
     spinlock_t lock; /* protect context, domain ids */
     spinlock_t register_lock; /* protect iommu register handling */
     u64 root_maddr; /* root entry machine address */
+    nodeid_t node;
     struct msi_desc msi;
     struct intel_iommu *intel;
     struct list_head ats_devices;
diff --git a/xen/drivers/passthrough/vtd/qinval.c 
b/xen/drivers/passthrough/vtd/qinval.c
index ca1a5acd43..980f20b8b9 100644
--- a/xen/drivers/passthrough/vtd/qinval.c
+++ b/xen/drivers/passthrough/vtd/qinval.c
@@ -415,8 +415,8 @@ int enable_qinval(struct iommu *iommu)
 
     if ( qi_ctrl->qinval_maddr == 0 )
     {
-        qi_ctrl->qinval_maddr = alloc_pgtable_maddr(iommu->intel->drhd,
-                                                    QINVAL_ARCH_PAGE_NR);
+        qi_ctrl->qinval_maddr = alloc_pgtable_maddr(QINVAL_ARCH_PAGE_NR,
+                                                    iommu->node);
         if ( qi_ctrl->qinval_maddr == 0 )
         {
             dprintk(XENLOG_WARNING VTDPREFIX,
diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h
index 314f28f323..ab258b848b 100644
--- a/xen/include/xen/iommu.h
+++ b/xen/include/xen/iommu.h
@@ -266,6 +266,11 @@ struct domain_iommu {
     struct list_head dt_devices;
 #endif
 
+#ifdef CONFIG_NUMA
+    /* NUMA node to do IOMMU related allocations against. */
+    nodeid_t node;
+#endif
+
     /* Features supported by the IOMMU */
     DECLARE_BITMAP(features, IOMMU_FEAT_count);
 
--
generated by git-patchbot for /home/xen/git/xen.git#staging

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/xen-changelog

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.