[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[xen staging-4.13] x86/pv: Track and flush non-coherent mappings of RAM



commit f9ae12fc103c7429d8ce6bc57b8cbcefdd71cd45
Author:     Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
AuthorDate: Thu Jun 9 17:16:06 2022 +0200
Commit:     Jan Beulich <jbeulich@xxxxxxxx>
CommitDate: Thu Jun 9 17:16:06 2022 +0200

    x86/pv: Track and flush non-coherent mappings of RAM
    
    There are legitimate uses of WC mappings of RAM, e.g. for DMA buffers with
    devices that make non-coherent writes.  The Linux sound subsystem makes
    extensive use of this technique.
    
    For such usecases, the guest's DMA buffer is mapped and consistently used as
    WC, and Xen doesn't interact with the buffer.
    
    However, a mischevious guest can use WC mappings to deliberately create
    non-coherency between the cache and RAM, and use this to trick Xen into
    validating a pagetable which isn't actually safe.
    
    Allocate a new PGT_non_coherent to track the non-coherency of mappings.  Set
    it whenever a non-coherent writeable mapping is created.  If the page is 
used
    as anything other than PGT_writable_page, force a cache flush before
    validation.  Also force a cache flush before the page is returned to the 
heap.
    
    This is CVE-2022-26364, part of XSA-402.
    
    Reported-by: Jann Horn <jannh@xxxxxxxxxx>
    Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
    Reviewed-by: George Dunlap <george.dunlap@xxxxxxxxxx>
    Reviewed-by: Jan Beulich <jbeulich@xxxxxxxx>
    master commit: c1c9cae3a9633054b177c5de21ad7268162b2f2c
    master date: 2022-06-09 14:23:37 +0200
---
 xen/arch/x86/mm.c             | 38 ++++++++++++++++++++++++++++++++++++++
 xen/arch/x86/pv/grant_table.c | 21 +++++++++++++++++++++
 xen/include/asm-x86/mm.h      |  6 +++++-
 3 files changed, 64 insertions(+), 1 deletion(-)

diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 859646b670..f5eeddce58 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -1000,6 +1000,15 @@ get_page_from_l1e(
         return -EACCES;
     }
 
+    /*
+     * Track writeable non-coherent mappings to RAM pages, to trigger a cache
+     * flush later if the target is used as anything but a PGT_writeable page.
+     * We care about all writeable mappings, including foreign mappings.
+     */
+    if ( !boot_cpu_has(X86_FEATURE_XEN_SELFSNOOP) &&
+         (l1f & (PAGE_CACHE_ATTRS | _PAGE_RW)) == (_PAGE_WC | _PAGE_RW) )
+        set_bit(_PGT_non_coherent, &page->u.inuse.type_info);
+
     return 0;
 
  could_not_pin:
@@ -2532,6 +2541,19 @@ static int cleanup_page_mappings(struct page_info *page)
         }
     }
 
+    /*
+     * Flush the cache if there were previously non-coherent writeable
+     * mappings of this page.  This forces the page to be coherent before it
+     * is freed back to the heap.
+     */
+    if ( __test_and_clear_bit(_PGT_non_coherent, &page->u.inuse.type_info) )
+    {
+        void *addr = __map_domain_page(page);
+
+        cache_flush(addr, PAGE_SIZE);
+        unmap_domain_page(addr);
+    }
+
     return rc;
 }
 
@@ -3089,6 +3111,22 @@ static int _get_page_type(struct page_info *page, 
unsigned long type,
 
     if ( unlikely(!(nx & PGT_validated)) )
     {
+        /*
+         * Flush the cache if there were previously non-coherent mappings of
+         * this page, and we're trying to use it as anything other than a
+         * writeable page.  This forces the page to be coherent before we
+         * validate its contents for safety.
+         */
+        if ( (nx & PGT_non_coherent) && type != PGT_writable_page )
+        {
+            void *addr = __map_domain_page(page);
+
+            cache_flush(addr, PAGE_SIZE);
+            unmap_domain_page(addr);
+
+            page->u.inuse.type_info &= ~PGT_non_coherent;
+        }
+
         /*
          * No special validation needed for writable or shared pages.  Page
          * tables and GDT/LDT need to have their contents audited.
diff --git a/xen/arch/x86/pv/grant_table.c b/xen/arch/x86/pv/grant_table.c
index 0325618c98..81c72e61ed 100644
--- a/xen/arch/x86/pv/grant_table.c
+++ b/xen/arch/x86/pv/grant_table.c
@@ -109,7 +109,17 @@ int create_grant_pv_mapping(uint64_t addr, mfn_t frame,
 
     ol1e = *pl1e;
     if ( UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, 0) )
+    {
+        /*
+         * We always create mappings in this path.  However, our caller,
+         * map_grant_ref(), only passes potentially non-zero cache_flags for
+         * MMIO frames, so this path doesn't create non-coherent mappings of
+         * RAM frames and there's no need to calculate PGT_non_coherent.
+         */
+        ASSERT(!cache_flags || is_iomem_page(frame));
+
         rc = GNTST_okay;
+    }
 
  out_unlock:
     page_unlock(page);
@@ -294,7 +304,18 @@ int replace_grant_pv_mapping(uint64_t addr, mfn_t frame,
                  l1e_get_flags(ol1e), addr, grant_pte_flags);
 
     if ( UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, 0) )
+    {
+        /*
+         * Generally, replace_grant_pv_mapping() is used to destroy mappings
+         * (n1le = l1e_empty()), but it can be a present mapping on the
+         * GNTABOP_unmap_and_replace path.
+         *
+         * In such cases, the PTE is fully transplanted from its old location
+         * via steal_linear_addr(), so we need not perform PGT_non_coherent
+         * checking here.
+         */
         rc = GNTST_okay;
+    }
 
  out_unlock:
     page_unlock(page);
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index db09849f73..82d0fd6104 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -48,8 +48,12 @@
 #define _PGT_partial      PG_shift(8)
 #define PGT_partial       PG_mask(1, 8)
 
+/* Has this page been mapped writeable with a non-coherent memory type? */
+#define _PGT_non_coherent PG_shift(9)
+#define PGT_non_coherent  PG_mask(1, 9)
+
  /* Count of uses of this frame as its current type. */
-#define PGT_count_width   PG_shift(8)
+#define PGT_count_width   PG_shift(9)
 #define PGT_count_mask    ((1UL<<PGT_count_width)-1)
 
 /* Are the 'type mask' bits identical? */
--
generated by git-patchbot for /home/xen/git/xen.git#staging-4.13



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.