[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] p2m: Allow non-leaf entries to be replaced by leaf entries



# HG changeset patch
# User George Dunlap <george.dunlap@xxxxxxxxxxxxx>
# Date 1295624256 0
# Node ID 9ca9331c97808b6aa02697619743cf6e8030587a
# Parent  003acf02d416d657f750b7a7748fa8c5a932222c
p2m: Allow non-leaf entries to be replaced by leaf entries

Allow l2 and l3 p2m tables to be replaced with 2MB and 1GB pages
respectively, freeing the p2m table page properly.  This allows, for example,
a sequence of 512 singleton zero pages to be replaced with a superpage
populate-on-demand entry.

Changes:
* Add a p2m_free_ptp() corresponding to p2m_alloc_ptp(), which will
handle everything related to the freeing properly.
* Add p2m_free_entry(), based on ept_free_entry(), which will free
intermediate tables recursively.
* For both ept and p2m, when replacing non-leaf entries with leaf
entries, keep old entry and call *_free_entry() after new entry
has been written and proper flushes have been done.

Signed-off-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>

diff -r 003acf02d416 -r 9ca9331c9780 xen/arch/x86/mm/hap/hap.c
--- a/xen/arch/x86/mm/hap/hap.c Thu Jan 20 17:04:06 2011 +0000
+++ b/xen/arch/x86/mm/hap/hap.c Fri Jan 21 15:37:36 2011 +0000
@@ -333,9 +333,11 @@
 
     ASSERT(page_get_owner(pg) == d);
     /* Should have just the one ref we gave it in alloc_p2m_page() */
-    if ( (pg->count_info & PGC_count_mask) != 1 )
-        HAP_ERROR("Odd p2m page count c=%#lx t=%"PRtype_info"\n",
-                  pg->count_info, pg->u.inuse.type_info);
+    if ( (pg->count_info & PGC_count_mask) != 1 ) {
+        HAP_ERROR("Odd p2m page %p count c=%#lx t=%"PRtype_info"\n",
+                     pg, pg->count_info, pg->u.inuse.type_info);
+        WARN();
+    }
     pg->count_info &= ~PGC_count_mask;
     /* Free should not decrement domain's total allocation, since
      * these pages were allocated without an owner. */
diff -r 003acf02d416 -r 9ca9331c9780 xen/arch/x86/mm/hap/p2m-ept.c
--- a/xen/arch/x86/mm/hap/p2m-ept.c     Thu Jan 20 17:04:06 2011 +0000
+++ b/xen/arch/x86/mm/hap/p2m-ept.c     Fri Jan 21 15:37:36 2011 +0000
@@ -166,8 +166,6 @@
 /* free ept sub tree behind an entry */
 void ept_free_entry(struct p2m_domain *p2m, ept_entry_t *ept_entry, int level)
 {
-    struct domain *d = p2m->domain;
-
     /* End if the entry is a leaf entry. */
     if ( level == 0 || !is_epte_present(ept_entry) ||
          is_epte_superpage(ept_entry) )
@@ -180,8 +178,8 @@
             ept_free_entry(p2m, epte + i, level - 1);
         unmap_domain_page(epte);
     }
-
-    d->arch.paging.free_page(d, mfn_to_page(ept_entry->mfn));
+    
+    p2m_free_ptp(p2m, mfn_to_page(ept_entry->mfn));
 }
 
 static int ept_split_super_page(struct p2m_domain *p2m, ept_entry_t *ept_entry,
@@ -317,6 +315,7 @@
     int vtd_pte_present = 0;
     int needs_sync = 1;
     struct domain *d = p2m->domain;
+    ept_entry_t old_entry = { .epte = 0 };
 
     /*
      * the caller must make sure:
@@ -357,8 +356,12 @@
     vtd_pte_present = is_epte_present(ept_entry) ? 1 : 0;
 
     /*
-     * When we are here, we must be on a leaf ept entry
-     * with i == target or i > target.
+     * If we're here with i > target, we must be at a leaf node, and
+     * we need to break up the superpage.
+     *
+     * If we're here with i == target and i > 0, we need to check to see
+     * if we're replacing a non-leaf entry (i.e., pointing to an N-1 table)
+     * with a leaf entry (a 1GiB or 2MiB page), and handle things 
appropriately.
      */
 
     if ( i == target )
@@ -370,6 +373,10 @@
         if ( !is_epte_present(ept_entry) )
             needs_sync = 0;
 
+        /* If we're replacing a non-leaf entry with a leaf entry (1GiB or 
2MiB),
+         * the intermediate tables will be freed below after the ept flush */
+        old_entry = *ept_entry;
+
         if ( mfn_valid(mfn_x(mfn)) || direct_mmio || p2m_is_paged(p2mt) ||
              (p2mt == p2m_ram_paging_in_start) )
         {
@@ -487,6 +494,13 @@
         }
     }
 
+    /* Release the old intermediate tables, if any.  This has to be the
+       last thing we do, after the ept_sync_domain() and removal
+       from the iommu tables, so as to avoid a potential
+       use-after-free. */
+    if ( is_epte_present(&old_entry) )
+        ept_free_entry(p2m, &old_entry, target);
+
     return rv;
 }
 
diff -r 003acf02d416 -r 9ca9331c9780 xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c     Thu Jan 20 17:04:06 2011 +0000
+++ b/xen/arch/x86/mm/p2m.c     Fri Jan 21 15:37:36 2011 +0000
@@ -153,11 +153,45 @@
 
     page_list_add_tail(pg, &p2m->pages);
     pg->u.inuse.type_info = type | 1 | PGT_validated;
-    pg->count_info |= 1;
 
     return pg;
 }
 
+void
+p2m_free_ptp(struct p2m_domain *p2m, struct page_info *pg)
+{
+    ASSERT(pg);
+    ASSERT(p2m);
+    ASSERT(p2m->domain);
+    ASSERT(p2m->domain->arch.paging.free_page);
+
+    page_list_del(pg, &p2m->pages);
+    p2m->domain->arch.paging.free_page(p2m->domain, pg);
+
+    return;
+}
+
+/* Free intermediate tables from a p2m sub-tree */
+void
+p2m_free_entry(struct p2m_domain *p2m, l1_pgentry_t *p2m_entry, int page_order)
+{
+    /* End if the entry is a leaf entry. */
+    if ( page_order == 0
+         || !(l1e_get_flags(*p2m_entry) & _PAGE_PRESENT)
+         || (l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
+        return;
+
+    if ( page_order > 9 )
+    {
+        l1_pgentry_t *l3_table = map_domain_page(l1e_get_pfn(*p2m_entry));
+        for ( int i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
+            p2m_free_entry(p2m, l3_table + i, page_order - 9);
+        unmap_domain_page(l3_table);
+    }
+
+    p2m_free_ptp(p2m, mfn_to_page(l1e_get_pfn(*p2m_entry)));
+}
+
 // Walk one level of the P2M table, allocating a new table if required.
 // Returns 0 on error.
 //
@@ -1316,6 +1350,7 @@
      */
     if ( page_order == 18 )
     {
+        l1_pgentry_t old_entry = { .l1=0 };
         p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
                                    L3_PAGETABLE_SHIFT - PAGE_SHIFT,
                                    L3_PAGETABLE_ENTRIES);
@@ -1323,10 +1358,11 @@
         if ( (l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) &&
              !(l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
         {
-            P2M_ERROR("configure P2M table L3 entry with large page\n");
-            domain_crash(p2m->domain);
-            goto out;
+            /* We're replacing a non-SP page with a superpage.  Make sure to
+             * handle freeing the table properly. */
+            old_entry = *p2m_entry;
         }
+
         ASSERT(!mfn_valid(mfn) || p2mt != p2m_mmio_direct);
         l3e_content = mfn_valid(mfn) 
             ? l3e_from_pfn(mfn_x(mfn),
@@ -1335,7 +1371,11 @@
         entry_content.l1 = l3e_content.l3;
         paging_write_p2m_entry(p2m->domain, gfn, p2m_entry,
                                table_mfn, entry_content, 3);
-
+        /* NB: paging_write_p2m_entry() handles tlb flushes properly */
+
+        /* Free old intermediate tables if necessary */
+        if ( l1e_get_flags(old_entry) & _PAGE_PRESENT )
+            p2m_free_entry(p2m, &old_entry, page_order);
     }
     /*
      * When using PAE Xen, we only allow 33 bits of pseudo-physical
@@ -1372,9 +1412,11 @@
         /* level 1 entry */
         paging_write_p2m_entry(p2m->domain, gfn, p2m_entry,
                                table_mfn, entry_content, 1);
+        /* NB: paging_write_p2m_entry() handles tlb flushes properly */
     }
     else if ( page_order == 9 )
     {
+        l1_pgentry_t old_entry = { .l1=0 };
         p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
                                    L2_PAGETABLE_SHIFT - PAGE_SHIFT,
                                    L2_PAGETABLE_ENTRIES);
@@ -1384,9 +1426,9 @@
         if ( (l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) &&
              !(l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
         {
-            P2M_ERROR("configure P2M table 4KB L2 entry with large page\n");
-            domain_crash(p2m->domain);
-            goto out;
+            /* We're replacing a non-SP page with a superpage.  Make sure to
+             * handle freeing the table properly. */
+            old_entry = *p2m_entry;
         }
         
         ASSERT(!mfn_valid(mfn) || p2mt != p2m_mmio_direct);
@@ -1400,6 +1442,11 @@
         entry_content.l1 = l2e_content.l2;
         paging_write_p2m_entry(p2m->domain, gfn, p2m_entry,
                                table_mfn, entry_content, 2);
+        /* NB: paging_write_p2m_entry() handles tlb flushes properly */
+
+        /* Free old intermediate tables if necessary */
+        if ( l1e_get_flags(old_entry) & _PAGE_PRESENT )
+            p2m_free_entry(p2m, &old_entry, page_order);
     }
 
     /* Track the highest gfn for which we have ever had a valid mapping */
diff -r 003acf02d416 -r 9ca9331c9780 xen/include/asm-x86/p2m.h
--- a/xen/include/asm-x86/p2m.h Thu Jan 20 17:04:06 2011 +0000
+++ b/xen/include/asm-x86/p2m.h Fri Jan 21 15:37:36 2011 +0000
@@ -541,6 +541,7 @@
 #endif
 
 struct page_info *p2m_alloc_ptp(struct p2m_domain *p2m, unsigned long type);
+void p2m_free_ptp(struct p2m_domain *p2m, struct page_info *pg);
 
 #endif /* _XEN_P2M_H */
 

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.