[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 4/5] VT-d: widen NUMA nodes to be allocated from



I think Andrew’s comment is correct. IO NUMA is different and IOMMU structures should be allocated based on proximity of IOMMU itself.

 

Thanks

Kevin

 

From: Andrew Cooper [mailto:andrew.cooper3@xxxxxxxxxx]
Sent: Friday, March 06, 2015 1:09 AM
To: Jan Beulich; xen-devel
Cc: Zhang, Yang Z; Dario Faggioli; Tian, Kevin
Subject: Re: [Xen-devel] [PATCH 4/5] VT-d: widen NUMA nodes to be allocated from

 

On 26/02/15 13:55, Jan Beulich wrote:

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>


I believe that this change is incorrect.


 
 
--- a/xen/drivers/passthrough/vtd/extern.h
+++ b/xen/drivers/passthrough/vtd/extern.h
@@ -67,7 +67,8 @@ unsigned int get_cache_line_size(void);
 void cacheline_flush(char *);
 void flush_all_cache(void);
 
-u64 alloc_pgtable_maddr(struct acpi_drhd_unit *drhd, unsigned long npages);
+u64 alloc_pgtable_maddr(struct acpi_drhd_unit *, unsigned int npages,
+                        struct domain *);
 void free_pgtable_maddr(u64 maddr);
 void *map_vtd_domain_page(u64 maddr);
 void unmap_vtd_domain_page(void *va);
--- a/xen/drivers/passthrough/vtd/intremap.c
+++ b/xen/drivers/passthrough/vtd/intremap.c
@@ -739,7 +739,8 @@ int enable_intremap(struct iommu *iommu,
     if ( ir_ctrl->iremap_maddr == 0 )
     {
         drhd = iommu_to_drhd(iommu);
-        ir_ctrl->iremap_maddr = alloc_pgtable_maddr(drhd, IREMAP_ARCH_PAGE_NR);
+        ir_ctrl->iremap_maddr = alloc_pgtable_maddr(drhd, IREMAP_ARCH_PAGE_NR,
+                                                    NULL);
         if ( ir_ctrl->iremap_maddr == 0 )
         {
             dprintk(XENLOG_WARNING VTDPREFIX,
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -185,7 +185,8 @@ void iommu_flush_cache_page(void *addr, 
 }
 
 /* Allocate page table, return its machine address */
-u64 alloc_pgtable_maddr(struct acpi_drhd_unit *drhd, unsigned long npages)
+u64 alloc_pgtable_maddr(struct acpi_drhd_unit *drhd, unsigned int npages,
+                        struct domain *d)
 {
     struct acpi_rhsa_unit *rhsa;
     struct page_info *pg, *cur_pg;
@@ -195,10 +196,10 @@ u64 alloc_pgtable_maddr(struct acpi_drhd
 
     rhsa = drhd_to_rhsa(drhd);
     if ( rhsa )
-        node =  pxm_to_node(rhsa->proximity_domain);
+        node = pxm_to_node(rhsa->proximity_domain);
 
-    pg = alloc_domheap_pages(NULL, get_order_from_pages(npages),
-                             (node == NUMA_NO_NODE) ? 0 : MEMF_node(node));
+    pg = alloc_domheap_pages(d, get_order_from_pages(npages),
+                             MEMF_node(node) | MEMF_no_owner);
     if ( !pg )
         return 0;
 
@@ -223,7 +224,7 @@ void free_pgtable_maddr(u64 maddr)
 }
 
 /* context entry handling */
-static u64 bus_to_context_maddr(struct iommu *iommu, u8 bus)
+static u64 bus_to_context_maddr(struct iommu *iommu, u8 bus, struct domain *d)
 {
     struct acpi_drhd_unit *drhd;
     struct root_entry *root, *root_entries;
@@ -235,7 +236,7 @@ static u64 bus_to_context_maddr(struct i
     if ( !root_present(*root) )
     {
         drhd = iommu_to_drhd(iommu);
-        maddr = alloc_pgtable_maddr(drhd, 1);
+        maddr = alloc_pgtable_maddr(drhd, 1, d);
         if ( maddr == 0 )
         {
             unmap_vtd_domain_page(root_entries);
@@ -271,7 +272,9 @@ static u64 addr_to_dma_page_maddr(struct
          */
         pdev = pci_get_pdev_by_domain(domain, -1, -1, -1);
         drhd = acpi_find_matched_drhd_unit(pdev);
-        if ( !alloc || ((hd->arch.pgd_maddr = alloc_pgtable_maddr(drhd, 1)) == 0) )
+        if ( !alloc ||
+             ((hd->arch.pgd_maddr = alloc_pgtable_maddr(drhd, 1,
+                                                        domain)) == 0) )


This allocation should be based on the proximity information of the iommu, not of the requesting domain.


 
             goto out;
     }
 
@@ -289,7 +292,7 @@ static u64 addr_to_dma_page_maddr(struct
 
             pdev = pci_get_pdev_by_domain(domain, -1, -1, -1);
             drhd = acpi_find_matched_drhd_unit(pdev);
-            pte_maddr = alloc_pgtable_maddr(drhd, 1);
+            pte_maddr = alloc_pgtable_maddr(drhd, 1, domain);


As should this.

I don't see a need to extend the prototype of alloc_pgtable_maddr().  The drhd parameter contains all relevant information concerning proximity.

~Andrew


 
             if ( !pte_maddr )
                 break;
 
@@ -1119,7 +1122,7 @@ int __init iommu_alloc(struct acpi_drhd_
     iommu->intel->drhd = drhd;
     drhd->iommu = iommu;
 
-    if ( !(iommu->root_maddr = alloc_pgtable_maddr(drhd, 1)) )
+    if ( !(iommu->root_maddr = alloc_pgtable_maddr(drhd, 1, NULL)) )
         return -ENOMEM;
 
     iommu->reg = ioremap(drhd->address, PAGE_SIZE);
@@ -1270,7 +1273,7 @@ int domain_context_mapping_one(
 
     ASSERT(spin_is_locked(&pcidevs_lock));
     spin_lock(&iommu->lock);
-    maddr = bus_to_context_maddr(iommu, bus);
+    maddr = bus_to_context_maddr(iommu, bus, domain);
     context_entries = (struct context_entry *)map_vtd_domain_page(maddr);
     context = &context_entries[devfn];
 
@@ -1495,7 +1498,7 @@ int domain_context_unmap_one(
     ASSERT(spin_is_locked(&pcidevs_lock));
     spin_lock(&iommu->lock);
 
-    maddr = bus_to_context_maddr(iommu, bus);
+    maddr = bus_to_context_maddr(iommu, bus, domain);
     context_entries = (struct context_entry *)map_vtd_domain_page(maddr);
     context = &context_entries[devfn];
 
--- a/xen/drivers/passthrough/vtd/qinval.c
+++ b/xen/drivers/passthrough/vtd/qinval.c
@@ -374,7 +374,8 @@ int enable_qinval(struct iommu *iommu)
     if ( qi_ctrl->qinval_maddr == 0 )
     {
         drhd = iommu_to_drhd(iommu);
-        qi_ctrl->qinval_maddr = alloc_pgtable_maddr(drhd, QINVAL_ARCH_PAGE_NR);
+        qi_ctrl->qinval_maddr = alloc_pgtable_maddr(drhd, QINVAL_ARCH_PAGE_NR,
+                                                    NULL);
         if ( qi_ctrl->qinval_maddr == 0 )
         {
             dprintk(XENLOG_WARNING VTDPREFIX,
 
 




_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.