[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v5 09/15] AMD/IOMMU: free all-empty page tables


  • To: "xen-devel@xxxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxxx>
  • From: Jan Beulich <jbeulich@xxxxxxxx>
  • Date: Fri, 27 May 2022 13:18:43 +0200
  • Arc-authentication-results: i=1; mx.microsoft.com 1; spf=pass smtp.mailfrom=suse.com; dmarc=pass action=none header.from=suse.com; dkim=pass header.d=suse.com; arc=none
  • Arc-message-signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector9901; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-AntiSpam-MessageData-ChunkCount:X-MS-Exchange-AntiSpam-MessageData-0:X-MS-Exchange-AntiSpam-MessageData-1; bh=i1SRbMZ5vtUn4A75NysmPVCeWEjl1KBkL8Pu3NNkQeY=; b=M7fWYVj5h67HGeunumVuSpEDuQJLlblXqTV5AF6qd0iJdl3Gd5pRr6tVu23xUnj2y1X5lh7t+gOnLRbXieLyIkEdK5ZyB0cEPVWpejcrRh02Boi8CGzZHPXXmoIWhEMmOzpec2xDnagIXhK4WcG2t5hMhiCYfKyX2sJTY+ADGg4JNyfpnoeAQd80DhW3ooJVtF1FgYZlSz6Ao99p7NvnWjCym2uPgpk5A2of/WPpSR5bgk2aW6/BtS3g9y7PPHPNdc+EsOaACbqwAwcNTCqUGYGDFqEmae5BlpleODJWoEN3VA9Qkze3f4RUJ0S2vjJpavOvY8PfdmueU8Dk/CaAOQ==
  • Arc-seal: i=1; a=rsa-sha256; s=arcselector9901; d=microsoft.com; cv=none; b=hQ1jIvc5l9Tv/CeQtecHdQ8E7lGXuclAN7bJWF+PruSxY3que6ueVvGnYZ+aVgpXhxmn99wB8VppE56/vyGZQn6FaVO4l26y+RkCPr7lOCM16Yu6bj+KLyohvJ+QM0f6fY/jHAz/vFtEE7wtnvvnnQq+4Jn2sEEjdYKv2cRzCZBugYGwm6aRjQ12NoKgPdxIycfkenVTsSy5EtpBm8KA5s1nrj9NmdCXtJYuhwcVZujUXKBeLg89NFq2+chuPUn3teIQehfcmY538qEHloMXSVYt+7sSm8NwRWSTr4MnxSZt5EUdsr05HQLO8dxXUpafVVH5qR5pir8LRCEZNhSkSQ==
  • Authentication-results: dkim=none (message not signed) header.d=none;dmarc=none action=none header.from=suse.com;
  • Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>, Paul Durrant <paul@xxxxxxx>, Roger Pau Monné <roger.pau@xxxxxxxxxx>
  • Delivery-date: Fri, 27 May 2022 11:18:51 +0000
  • List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

When a page table ends up with no present entries left, it can be
replaced by a non-present entry at the next higher level. The page table
itself can then be scheduled for freeing.

Note that while its output isn't used there yet,
pt_update_contig_markers() right away needs to be called in all places
where entries get updated, not just the one where entries get cleared.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
Reviewed-by: Roger Pau Monné <roger.pau@xxxxxxxxxx>
---
v5: Re-base over changes earlier in the series.
v4: Re-base over changes earlier in the series.
v3: Re-base over changes earlier in the series.
v2: New.

--- a/xen/drivers/passthrough/amd/iommu_map.c
+++ b/xen/drivers/passthrough/amd/iommu_map.c
@@ -21,6 +21,7 @@
 
 #include "iommu.h"
 
+#define CONTIG_MASK IOMMU_PTE_CONTIG_MASK
 #include <asm/pt-contig-markers.h>
 
 /* Given pfn and page table level, return pde index */
@@ -35,16 +36,20 @@ static unsigned int pfn_to_pde_idx(unsig
 
 static union amd_iommu_pte clear_iommu_pte_present(unsigned long l1_mfn,
                                                    unsigned long dfn,
-                                                   unsigned int level)
+                                                   unsigned int level,
+                                                   bool *free)
 {
     union amd_iommu_pte *table, *pte, old;
+    unsigned int idx = pfn_to_pde_idx(dfn, level);
 
     table = map_domain_page(_mfn(l1_mfn));
-    pte = &table[pfn_to_pde_idx(dfn, level)];
+    pte = &table[idx];
     old = *pte;
 
     write_atomic(&pte->raw, 0);
 
+    *free = pt_update_contig_markers(&table->raw, idx, level, PTE_kind_null);
+
     unmap_domain_page(table);
 
     return old;
@@ -87,7 +92,11 @@ static union amd_iommu_pte set_iommu_pte
     if ( !old.pr || old.next_level ||
          old.mfn != next_mfn ||
          old.iw != iw || old.ir != ir )
+    {
         set_iommu_pde_present(pde, next_mfn, 0, iw, ir);
+        pt_update_contig_markers(&table->raw, pfn_to_pde_idx(dfn, level),
+                                 level, PTE_kind_leaf);
+    }
     else
         old.pr = false; /* signal "no change" to the caller */
 
@@ -326,6 +335,9 @@ static int iommu_pde_from_dfn(struct dom
             smp_wmb();
             set_iommu_pde_present(pde, next_table_mfn, next_level, true,
                                   true);
+            pt_update_contig_markers(&next_table_vaddr->raw,
+                                     pfn_to_pde_idx(dfn, level),
+                                     level, PTE_kind_table);
 
             *flush_flags |= IOMMU_FLUSHF_modified;
         }
@@ -351,6 +363,9 @@ static int iommu_pde_from_dfn(struct dom
                 next_table_mfn = mfn_x(page_to_mfn(table));
                 set_iommu_pde_present(pde, next_table_mfn, next_level, true,
                                       true);
+                pt_update_contig_markers(&next_table_vaddr->raw,
+                                         pfn_to_pde_idx(dfn, level),
+                                         level, PTE_kind_table);
             }
             else /* should never reach here */
             {
@@ -487,8 +502,24 @@ int cf_check amd_iommu_unmap_page(
 
     if ( pt_mfn )
     {
+        bool free;
+
         /* Mark PTE as 'page not present'. */
-        old = clear_iommu_pte_present(pt_mfn, dfn_x(dfn), level);
+        old = clear_iommu_pte_present(pt_mfn, dfn_x(dfn), level, &free);
+
+        while ( unlikely(free) && ++level < hd->arch.amd.paging_mode )
+        {
+            struct page_info *pg = mfn_to_page(_mfn(pt_mfn));
+
+            if ( iommu_pde_from_dfn(d, dfn_x(dfn), level, &pt_mfn,
+                                    flush_flags, false) )
+                BUG();
+            BUG_ON(!pt_mfn);
+
+            clear_iommu_pte_present(pt_mfn, dfn_x(dfn), level, &free);
+            *flush_flags |= IOMMU_FLUSHF_all;
+            iommu_queue_free_pgtable(hd, pg);
+        }
     }
 
     spin_unlock(&hd->arch.mapping_lock);




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.