[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v2 15/18] IOMMU/x86: prefill newly allocate page tables


  • To: "xen-devel@xxxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxxx>
  • From: Jan Beulich <jbeulich@xxxxxxxx>
  • Date: Fri, 24 Sep 2021 11:54:58 +0200
  • Arc-authentication-results: i=1; mx.microsoft.com 1; spf=pass smtp.mailfrom=suse.com; dmarc=pass action=none header.from=suse.com; dkim=pass header.d=suse.com; arc=none
  • Arc-message-signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector9901; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version; bh=2Xjl2PorN1TigRAzYAHXhOxQtJC1FCMn+69GJeBFkFk=; b=B+HkSGiowL/ncV++IFp0XHTZq0v3DPKawl3OD9TJGuddA0sZXRlTPo3frS2Ivvvgw9rgxXdxIU2QbvauZNd8v2Sot9VPUD6sajcdeJtyUI+79+zrqAA6dUhLfmSSzDMjneZDRcJZo2aayhUQMP42fA2xZPdLG9HC+ywSBFH9YWOzpyoq5547S/QT1OsDJ2n5LsOQjptqB6VHb2BoMs0R9p3xu6Txb3PqFsxqfr6xurBr3bBs/H85tXPxKzwGDYzb/9zmllkZOafi6PU9BivsD2Aau1kjvpAgNvxbQzBGs4UbFYM/ut4k4HRbwGySXiwhXrJG/hz0PYJuiN5UaZtlyg==
  • Arc-seal: i=1; a=rsa-sha256; s=arcselector9901; d=microsoft.com; cv=none; b=ioK3fdrWVXG1xc4MwrXPL/jnz5GUkgYaB/0K6oeJuuak+76NQpjd+pWKcgrMdcBQV/qq4wJiWtYaXLy9or2tZKVSboaXBGFjVAXLEASPYnbkroq77Gy6VHVslwblZOxW0aHldlymuV20aBtzMb1N29EhRnih/c/Tt6Yei+qqLucJo017q8oSUGEU0f+bAJUYSjP4IZETHh2miyNduC4yM6Mptf5+71TZ/NSelD4lAJnjEMaQm9FVdc/4XwuhQkugOwJ49qofNMICXAABqQj9S+OS+WJ6Eex5xZ5F8D1Vr6eYZLbUEiXcYwCYnMuxshyAVQ1hPQmvsrF+5Kpvey1C/w==
  • Authentication-results: citrix.com; dkim=none (message not signed) header.d=none;citrix.com; dmarc=none action=none header.from=suse.com;
  • Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>, Paul Durrant <paul@xxxxxxx>, Kevin Tian <kevin.tian@xxxxxxxxx>, Roger Pau Monné <roger.pau@xxxxxxxxxx>
  • Delivery-date: Fri, 24 Sep 2021 09:55:06 +0000
  • List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

Page table are used for two purposes after allocation: They either start
out all empty, or they get filled to replace a superpage. Subsequently,
to replace all empty or fully contiguous page tables, contiguous sub-
regions will be recorded within individual page tables. Install the
initial set of markers immediately after allocation. Make sure to retain
these markers when further populating a page table in preparation for it
to replace a superpage.

The markers are simply 4-bit fields holding the order value of
contiguous entries. To demonstrate this, if a page table had just 16
entries, this would be the initial (fully contiguous) set of markers:

index  0 1 2 3 4 5 6 7 8 9 A B C D E F
marker 4 0 1 0 2 0 1 0 3 0 1 0 2 0 1 0

"Contiguous" here means not only present entries with successively
increasing MFNs, each one suitably aligned for its slot, but also a
respective number of all non-present entries.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
---
An alternative to the ASSERT()s added to set_iommu_ptes_present() would
be to make the function less general-purpose; it's used in a single
place only after all (i.e. it might as well be folded into its only
caller).
---
v2: New.

--- a/xen/drivers/passthrough/amd/iommu-defs.h
+++ b/xen/drivers/passthrough/amd/iommu-defs.h
@@ -445,6 +445,8 @@ union amd_iommu_x2apic_control {
 #define IOMMU_PAGE_TABLE_U32_PER_ENTRY (IOMMU_PAGE_TABLE_ENTRY_SIZE / 4)
 #define IOMMU_PAGE_TABLE_ALIGNMENT     4096
 
+#define IOMMU_PTE_CONTIG_MASK           0x1e /* The ign0 field below. */
+
 union amd_iommu_pte {
     uint64_t raw;
     struct {
--- a/xen/drivers/passthrough/amd/iommu_map.c
+++ b/xen/drivers/passthrough/amd/iommu_map.c
@@ -116,7 +116,19 @@ static void set_iommu_ptes_present(unsig
 
     while ( nr_ptes-- )
     {
-        set_iommu_pde_present(pde, next_mfn, 0, iw, ir);
+        ASSERT(!pde->next_level);
+        ASSERT(!pde->u);
+
+        if ( pde > table )
+            ASSERT(pde->ign0 == find_first_set_bit(pde - table));
+        else
+            ASSERT(pde->ign0 == PAGE_SHIFT - 3);
+
+        pde->iw = iw;
+        pde->ir = ir;
+        pde->fc = true; /* See set_iommu_pde_present(). */
+        pde->mfn = next_mfn;
+        pde->pr = true;
 
         ++pde;
         next_mfn += page_sz;
@@ -232,7 +244,7 @@ static int iommu_pde_from_dfn(struct dom
             mfn = next_table_mfn;
 
             /* allocate lower level page table */
-            table = iommu_alloc_pgtable(d);
+            table = iommu_alloc_pgtable(d, IOMMU_PTE_CONTIG_MASK);
             if ( table == NULL )
             {
                 AMD_IOMMU_DEBUG("Cannot allocate I/O page table\n");
@@ -262,7 +274,7 @@ static int iommu_pde_from_dfn(struct dom
 
             if ( next_table_mfn == 0 )
             {
-                table = iommu_alloc_pgtable(d);
+                table = iommu_alloc_pgtable(d, IOMMU_PTE_CONTIG_MASK);
                 if ( table == NULL )
                 {
                     AMD_IOMMU_DEBUG("Cannot allocate I/O page table\n");
@@ -648,7 +660,7 @@ int __init amd_iommu_quarantine_init(str
 
     spin_lock(&hd->arch.mapping_lock);
 
-    hd->arch.amd.root_table = iommu_alloc_pgtable(d);
+    hd->arch.amd.root_table = iommu_alloc_pgtable(d, 0);
     if ( !hd->arch.amd.root_table )
         goto out;
 
@@ -663,7 +675,7 @@ int __init amd_iommu_quarantine_init(str
          * page table pages, and the resulting allocations are always
          * zeroed.
          */
-        pg = iommu_alloc_pgtable(d);
+        pg = iommu_alloc_pgtable(d, 0);
         if ( !pg )
             break;
 
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -238,7 +238,7 @@ int amd_iommu_alloc_root(struct domain *
 
     if ( unlikely(!hd->arch.amd.root_table) )
     {
-        hd->arch.amd.root_table = iommu_alloc_pgtable(d);
+        hd->arch.amd.root_table = iommu_alloc_pgtable(d, 0);
         if ( !hd->arch.amd.root_table )
             return -ENOMEM;
     }
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -297,7 +297,7 @@ static uint64_t addr_to_dma_page_maddr(s
             goto out;
 
         pte_maddr = level;
-        if ( !(pg = iommu_alloc_pgtable(domain)) )
+        if ( !(pg = iommu_alloc_pgtable(domain, 0)) )
             goto out;
 
         hd->arch.vtd.pgd_maddr = page_to_maddr(pg);
@@ -339,7 +339,7 @@ static uint64_t addr_to_dma_page_maddr(s
             }
 
             pte_maddr = level - 1;
-            pg = iommu_alloc_pgtable(domain);
+            pg = iommu_alloc_pgtable(domain, DMA_PTE_CONTIG_MASK);
             if ( !pg )
                 break;
 
@@ -351,12 +351,13 @@ static uint64_t addr_to_dma_page_maddr(s
                 struct dma_pte *split = map_vtd_domain_page(pte_maddr);
                 unsigned long inc = 1UL << level_to_offset_bits(level - 1);
 
-                split[0].val = pte->val;
+                split[0].val |= pte->val & ~DMA_PTE_CONTIG_MASK;
                 if ( inc == PAGE_SIZE )
                     split[0].val &= ~DMA_PTE_SP;
 
                 for ( offset = 1; offset < PTE_NUM; ++offset )
-                    split[offset].val = split[offset - 1].val + inc;
+                    split[offset].val |=
+                        (split[offset - 1].val & ~DMA_PTE_CONTIG_MASK) + inc;
 
                 iommu_sync_cache(split, PAGE_SIZE);
                 unmap_vtd_domain_page(split);
@@ -1943,7 +1944,7 @@ static int __must_check intel_iommu_map_
     if ( iommu_snoop )
         dma_set_pte_snp(new);
 
-    if ( old.val == new.val )
+    if ( !((old.val ^ new.val) & ~DMA_PTE_CONTIG_MASK) )
     {
         spin_unlock(&hd->arch.mapping_lock);
         unmap_vtd_domain_page(page);
@@ -2798,7 +2799,7 @@ static int __init intel_iommu_quarantine
         goto out;
     }
 
-    pg = iommu_alloc_pgtable(d);
+    pg = iommu_alloc_pgtable(d, 0);
 
     rc = -ENOMEM;
     if ( !pg )
@@ -2817,7 +2818,7 @@ static int __init intel_iommu_quarantine
          * page table pages, and the resulting allocations are always
          * zeroed.
          */
-        pg = iommu_alloc_pgtable(d);
+        pg = iommu_alloc_pgtable(d, 0);
 
         if ( !pg )
             goto out;
--- a/xen/drivers/passthrough/vtd/iommu.h
+++ b/xen/drivers/passthrough/vtd/iommu.h
@@ -265,6 +265,7 @@ struct dma_pte {
 #define DMA_PTE_PROT (DMA_PTE_READ | DMA_PTE_WRITE)
 #define DMA_PTE_SP   (1 << 7)
 #define DMA_PTE_SNP  (1 << 11)
+#define DMA_PTE_CONTIG_MASK  (0xfull << PADDR_BITS)
 #define dma_clear_pte(p)    do {(p).val = 0;} while(0)
 #define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while(0)
 #define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while(0)
@@ -278,7 +279,7 @@ struct dma_pte {
 #define dma_pte_write(p) (dma_pte_prot(p) & DMA_PTE_WRITE)
 #define dma_pte_addr(p) ((p).val & PADDR_MASK & PAGE_MASK_4K)
 #define dma_set_pte_addr(p, addr) do {\
-            (p).val |= ((addr) & PAGE_MASK_4K); } while (0)
+            (p).val |= ((addr) & PADDR_MASK & PAGE_MASK_4K); } while (0)
 #define dma_pte_present(p) (((p).val & DMA_PTE_PROT) != 0)
 #define dma_pte_superpage(p) (((p).val & DMA_PTE_SP) != 0)
 
--- a/xen/drivers/passthrough/x86/iommu.c
+++ b/xen/drivers/passthrough/x86/iommu.c
@@ -433,12 +433,12 @@ int iommu_free_pgtables(struct domain *d
     return 0;
 }
 
-struct page_info *iommu_alloc_pgtable(struct domain *d)
+struct page_info *iommu_alloc_pgtable(struct domain *d, uint64_t contig_mask)
 {
     struct domain_iommu *hd = dom_iommu(d);
     unsigned int memflags = 0;
     struct page_info *pg;
-    void *p;
+    uint64_t *p;
 
 #ifdef CONFIG_NUMA
     if ( hd->node != NUMA_NO_NODE )
@@ -450,7 +450,28 @@ struct page_info *iommu_alloc_pgtable(st
         return NULL;
 
     p = __map_domain_page(pg);
-    clear_page(p);
+
+    if ( contig_mask )
+    {
+        unsigned int i, shift = find_first_set_bit(contig_mask);
+
+        ASSERT(((PAGE_SHIFT - 3) & (contig_mask >> shift)) == PAGE_SHIFT - 3);
+
+        p[0] = (PAGE_SHIFT - 3ull) << shift;
+        p[1] = 0;
+        p[2] = 1ull << shift;
+        p[3] = 0;
+
+        for ( i = 4; i < PAGE_SIZE / 8; i += 4 )
+        {
+            p[i + 0] = (find_first_set_bit(i) + 0ull) << shift;
+            p[i + 1] = 0;
+            p[i + 2] = 1ull << shift;
+            p[i + 3] = 0;
+        }
+    }
+    else
+        clear_page(p);
 
     if ( hd->platform_ops->sync_cache )
         iommu_vcall(hd->platform_ops, sync_cache, p, PAGE_SIZE);
--- a/xen/include/asm-x86/iommu.h
+++ b/xen/include/asm-x86/iommu.h
@@ -142,7 +142,8 @@ int pi_update_irte(const struct pi_desc
 })
 
 int __must_check iommu_free_pgtables(struct domain *d);
-struct page_info *__must_check iommu_alloc_pgtable(struct domain *d);
+struct page_info *__must_check iommu_alloc_pgtable(struct domain *d,
+                                                   uint64_t contig_mask);
 void iommu_queue_free_pgtable(struct domain *d, struct page_info *pg);
 
 #endif /* !__ARCH_X86_IOMMU_H__ */




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.