[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] p2m: Allow non-leaf entries to be replaced by leaf entries



# HG changeset patch
# User George Dunlap <george.dunlap@xxxxxxxxxxxxx>
# Date 1295624256 0
# Node ID e9277ab439472bbcf5717c82155361e6f97afa3a
# Parent  003acf02d416d657f750b7a7748fa8c5a932222c
p2m: Allow non-leaf entries to be replaced by leaf entries

Allow l2 and l3 p2m tables to be replaced with 2MB and 1GB pages
respectively, freeing the p2m table page properly.  This allows, for example,
a sequence of 512 singleton zero pages to be replaced with a superpage
populate-on-demand entry.

Changes:
* Add a p2m_free_ptp() corresponding to p2m_alloc_ptp(), which will
handle everything related to the freeing properly.
* Add p2m_free_entry(), based on ept_free_entry(), which will free
intermediate tables recursively.
* For both ept and p2m, when replacing non-leaf entries with leaf
entries, keep old entry and call *_free_entry() after new entry
has been written and proper flushes have been done.

Signed-off-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
Signed-off-by: Tim Deegan <Tim.Deegan@xxxxxxxxxx>
---
 xen/arch/x86/mm/hap/hap.c     |    8 +++--
 xen/arch/x86/mm/hap/p2m-ept.c |   26 ++++++++++++----
 xen/arch/x86/mm/p2m.c         |   65 ++++++++++++++++++++++++++++++++++++------
 xen/include/asm-x86/p2m.h     |    1 
 4 files changed, 82 insertions(+), 18 deletions(-)

diff -r 003acf02d416 -r e9277ab43947 xen/arch/x86/mm/hap/hap.c
--- a/xen/arch/x86/mm/hap/hap.c Thu Jan 20 17:04:06 2011 +0000
+++ b/xen/arch/x86/mm/hap/hap.c Fri Jan 21 15:37:36 2011 +0000
@@ -333,9 +333,11 @@ static void hap_free_p2m_page(struct dom
 
     ASSERT(page_get_owner(pg) == d);
     /* Should have just the one ref we gave it in alloc_p2m_page() */
-    if ( (pg->count_info & PGC_count_mask) != 1 )
-        HAP_ERROR("Odd p2m page count c=%#lx t=%"PRtype_info"\n",
-                  pg->count_info, pg->u.inuse.type_info);
+    if ( (pg->count_info & PGC_count_mask) != 1 ) {
+        HAP_ERROR("Odd p2m page %p count c=%#lx t=%"PRtype_info"\n",
+                     pg, pg->count_info, pg->u.inuse.type_info);
+        WARN();
+    }
     pg->count_info &= ~PGC_count_mask;
     /* Free should not decrement domain's total allocation, since
      * these pages were allocated without an owner. */
diff -r 003acf02d416 -r e9277ab43947 xen/arch/x86/mm/hap/p2m-ept.c
--- a/xen/arch/x86/mm/hap/p2m-ept.c     Thu Jan 20 17:04:06 2011 +0000
+++ b/xen/arch/x86/mm/hap/p2m-ept.c     Fri Jan 21 15:37:36 2011 +0000
@@ -166,8 +166,6 @@ static int ept_set_middle_entry(struct p
 /* free ept sub tree behind an entry */
 void ept_free_entry(struct p2m_domain *p2m, ept_entry_t *ept_entry, int level)
 {
-    struct domain *d = p2m->domain;
-
     /* End if the entry is a leaf entry. */
     if ( level == 0 || !is_epte_present(ept_entry) ||
          is_epte_superpage(ept_entry) )
@@ -180,8 +178,8 @@ void ept_free_entry(struct p2m_domain *p
             ept_free_entry(p2m, epte + i, level - 1);
         unmap_domain_page(epte);
     }
-
-    d->arch.paging.free_page(d, mfn_to_page(ept_entry->mfn));
+    
+    p2m_free_ptp(p2m, mfn_to_page(ept_entry->mfn));
 }
 
 static int ept_split_super_page(struct p2m_domain *p2m, ept_entry_t *ept_entry,
@@ -317,6 +315,7 @@ ept_set_entry(struct p2m_domain *p2m, un
     int vtd_pte_present = 0;
     int needs_sync = 1;
     struct domain *d = p2m->domain;
+    ept_entry_t old_entry = { .epte = 0 };
 
     /*
      * the caller must make sure:
@@ -357,8 +356,12 @@ ept_set_entry(struct p2m_domain *p2m, un
     vtd_pte_present = is_epte_present(ept_entry) ? 1 : 0;
 
     /*
-     * When we are here, we must be on a leaf ept entry
-     * with i == target or i > target.
+     * If we're here with i > target, we must be at a leaf node, and
+     * we need to break up the superpage.
+     *
+     * If we're here with i == target and i > 0, we need to check to see
+     * if we're replacing a non-leaf entry (i.e., pointing to an N-1 table)
+     * with a leaf entry (a 1GiB or 2MiB page), and handle things 
appropriately.
      */
 
     if ( i == target )
@@ -369,6 +372,10 @@ ept_set_entry(struct p2m_domain *p2m, un
         /* No need to flush if the old entry wasn't valid */
         if ( !is_epte_present(ept_entry) )
             needs_sync = 0;
+
+        /* If we're replacing a non-leaf entry with a leaf entry (1GiB or 
2MiB),
+         * the intermediate tables will be freed below after the ept flush */
+        old_entry = *ept_entry;
 
         if ( mfn_valid(mfn_x(mfn)) || direct_mmio || p2m_is_paged(p2mt) ||
              (p2mt == p2m_ram_paging_in_start) )
@@ -487,6 +494,13 @@ out:
         }
     }
 
+    /* Release the old intermediate tables, if any.  This has to be the
+       last thing we do, after the ept_sync_domain() and removal
+       from the iommu tables, so as to avoid a potential
+       use-after-free. */
+    if ( is_epte_present(&old_entry) )
+        ept_free_entry(p2m, &old_entry, target);
+
     return rv;
 }
 
diff -r 003acf02d416 -r e9277ab43947 xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c     Thu Jan 20 17:04:06 2011 +0000
+++ b/xen/arch/x86/mm/p2m.c     Fri Jan 21 15:37:36 2011 +0000
@@ -153,9 +153,43 @@ p2m_alloc_ptp(struct p2m_domain *p2m, un
 
     page_list_add_tail(pg, &p2m->pages);
     pg->u.inuse.type_info = type | 1 | PGT_validated;
-    pg->count_info |= 1;
 
     return pg;
+}
+
+void
+p2m_free_ptp(struct p2m_domain *p2m, struct page_info *pg)
+{
+    ASSERT(pg);
+    ASSERT(p2m);
+    ASSERT(p2m->domain);
+    ASSERT(p2m->domain->arch.paging.free_page);
+
+    page_list_del(pg, &p2m->pages);
+    p2m->domain->arch.paging.free_page(p2m->domain, pg);
+
+    return;
+}
+
+/* Free intermediate tables from a p2m sub-tree */
+void
+p2m_free_entry(struct p2m_domain *p2m, l1_pgentry_t *p2m_entry, int page_order)
+{
+    /* End if the entry is a leaf entry. */
+    if ( page_order == 0
+         || !(l1e_get_flags(*p2m_entry) & _PAGE_PRESENT)
+         || (l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
+        return;
+
+    if ( page_order > 9 )
+    {
+        l1_pgentry_t *l3_table = map_domain_page(l1e_get_pfn(*p2m_entry));
+        for ( int i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
+            p2m_free_entry(p2m, l3_table + i, page_order - 9);
+        unmap_domain_page(l3_table);
+    }
+
+    p2m_free_ptp(p2m, mfn_to_page(_mfn(l1e_get_pfn(*p2m_entry))));
 }
 
 // Walk one level of the P2M table, allocating a new table if required.
@@ -1316,6 +1350,7 @@ p2m_set_entry(struct p2m_domain *p2m, un
      */
     if ( page_order == 18 )
     {
+        l1_pgentry_t old_entry = l1e_empty();
         p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
                                    L3_PAGETABLE_SHIFT - PAGE_SHIFT,
                                    L3_PAGETABLE_ENTRIES);
@@ -1323,10 +1358,11 @@ p2m_set_entry(struct p2m_domain *p2m, un
         if ( (l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) &&
              !(l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
         {
-            P2M_ERROR("configure P2M table L3 entry with large page\n");
-            domain_crash(p2m->domain);
-            goto out;
-        }
+            /* We're replacing a non-SP page with a superpage.  Make sure to
+             * handle freeing the table properly. */
+            old_entry = *p2m_entry;
+        }
+
         ASSERT(!mfn_valid(mfn) || p2mt != p2m_mmio_direct);
         l3e_content = mfn_valid(mfn) 
             ? l3e_from_pfn(mfn_x(mfn),
@@ -1335,7 +1371,11 @@ p2m_set_entry(struct p2m_domain *p2m, un
         entry_content.l1 = l3e_content.l3;
         paging_write_p2m_entry(p2m->domain, gfn, p2m_entry,
                                table_mfn, entry_content, 3);
-
+        /* NB: paging_write_p2m_entry() handles tlb flushes properly */
+
+        /* Free old intermediate tables if necessary */
+        if ( l1e_get_flags(old_entry) & _PAGE_PRESENT )
+            p2m_free_entry(p2m, &old_entry, page_order);
     }
     /*
      * When using PAE Xen, we only allow 33 bits of pseudo-physical
@@ -1372,9 +1412,11 @@ p2m_set_entry(struct p2m_domain *p2m, un
         /* level 1 entry */
         paging_write_p2m_entry(p2m->domain, gfn, p2m_entry,
                                table_mfn, entry_content, 1);
+        /* NB: paging_write_p2m_entry() handles tlb flushes properly */
     }
     else if ( page_order == 9 )
     {
+        l1_pgentry_t old_entry = l1e_empty();
         p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
                                    L2_PAGETABLE_SHIFT - PAGE_SHIFT,
                                    L2_PAGETABLE_ENTRIES);
@@ -1384,9 +1426,9 @@ p2m_set_entry(struct p2m_domain *p2m, un
         if ( (l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) &&
              !(l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
         {
-            P2M_ERROR("configure P2M table 4KB L2 entry with large page\n");
-            domain_crash(p2m->domain);
-            goto out;
+            /* We're replacing a non-SP page with a superpage.  Make sure to
+             * handle freeing the table properly. */
+            old_entry = *p2m_entry;
         }
         
         ASSERT(!mfn_valid(mfn) || p2mt != p2m_mmio_direct);
@@ -1400,6 +1442,11 @@ p2m_set_entry(struct p2m_domain *p2m, un
         entry_content.l1 = l2e_content.l2;
         paging_write_p2m_entry(p2m->domain, gfn, p2m_entry,
                                table_mfn, entry_content, 2);
+        /* NB: paging_write_p2m_entry() handles tlb flushes properly */
+
+        /* Free old intermediate tables if necessary */
+        if ( l1e_get_flags(old_entry) & _PAGE_PRESENT )
+            p2m_free_entry(p2m, &old_entry, page_order);
     }
 
     /* Track the highest gfn for which we have ever had a valid mapping */
diff -r 003acf02d416 -r e9277ab43947 xen/include/asm-x86/p2m.h
--- a/xen/include/asm-x86/p2m.h Thu Jan 20 17:04:06 2011 +0000
+++ b/xen/include/asm-x86/p2m.h Fri Jan 21 15:37:36 2011 +0000
@@ -541,6 +541,7 @@ static inline void p2m_mem_access_check(
 #endif
 
 struct page_info *p2m_alloc_ptp(struct p2m_domain *p2m, unsigned long type);
+void p2m_free_ptp(struct p2m_domain *p2m, struct page_info *pg);
 
 #endif /* _XEN_P2M_H */
 

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.