[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] VMX: fix ept pages free up when ept superpage split fails.


  • To: "xen-devel@xxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxx>
  • From: "Li, Xin" <xin.li@xxxxxxxxx>
  • Date: Sat, 3 Jul 2010 13:37:01 +0800
  • Accept-language: zh-CN, en-US
  • Acceptlanguage: zh-CN, en-US
  • Delivery-date: Fri, 02 Jul 2010 22:38:16 -0700
  • List-id: Xen developer discussion <xen-devel.lists.xensource.com>
  • Thread-index: AcsaccLPVpfp/45HQ1SYN6gRMYJaRg==
  • Thread-topic: [PATCH] VMX: fix ept pages free up when ept superpage split fails.

VMX: fix ept pages free up when ept superpage split fails:
1) implement ept super page split in a recursive way to
   form an ept sub tree before real installation;
2) free an ept sub tree also in a recursive way.
3) change ept_next_level last input parameter from shift
   bits # to next walk level;

signed-off-by: Xin Li <xin.li@xxxxxxxxx>

diff -r f483b5ce7be2 xen/arch/x86/mm/hap/p2m-ept.c
--- a/xen/arch/x86/mm/hap/p2m-ept.c     Fri Jul 02 19:04:57 2010 +0100
+++ b/xen/arch/x86/mm/hap/p2m-ept.c     Sat Jul 03 21:29:56 2010 +0800
@@ -118,6 +118,74 @@
     return 1;
 }
 
+/* free ept sub tree behind an entry */
+void ept_free_entry(struct domain *d, ept_entry_t *ept_entry, int level)
+{
+    /* End if the entry is a leaf entry. */
+    if ( level == 0 || !is_epte_present(ept_entry) ||
+         is_epte_superpage(ept_entry) )
+        return;
+
+    if ( level > 1 )
+    {
+        ept_entry_t *epte = map_domain_page(ept_entry->mfn);
+        for ( int i = 0; i < EPT_PAGETABLE_ENTRIES; i++ )
+            ept_free_entry(d, epte + i, level - 1);
+        unmap_domain_page(epte);
+    }
+
+    d->arch.p2m->free_page(d, mfn_to_page(ept_entry->mfn));
+}
+
+static int ept_split_super_page(struct domain *d, ept_entry_t *ept_entry,
+                                int level, int target)
+{
+    ept_entry_t new_ept, *table;
+    uint64_t trunk;
+    int rv = 1;
+
+    /* End if the entry is a leaf entry or reaches the target level. */
+    if ( level == 0 || level == target )
+        return rv;
+
+    ASSERT(is_epte_superpage(ept_entry));
+
+    if ( !ept_set_middle_entry(d, &new_ept) )
+        return 0;
+
+    table = map_domain_page(new_ept.mfn);
+    trunk = 1UL << ((level - 1) * EPT_TABLE_ORDER);
+
+    for ( int i = 0; i < EPT_PAGETABLE_ENTRIES; i++ )
+    {
+        ept_entry_t *epte = table + i;
+
+        epte->emt = ept_entry->emt;
+        epte->ipat = ept_entry->ipat;
+        epte->sp = (level > 1) ? 1 : 0;
+        epte->avail1 = ept_entry->avail1;
+        epte->avail2 = 0;
+        epte->mfn = ept_entry->mfn + i * trunk;
+
+        ept_p2m_type_to_flags(epte, epte->avail1);
+
+        if ( (level - 1) == target )
+            continue;
+
+        ASSERT(is_epte_superpage(epte));
+
+        if ( !(rv = ept_split_super_page(d, epte, level - 1, target)) )
+            break;
+    }
+
+    unmap_domain_page(table);
+
+    /* Even failed we should install the newly allocated ept page. */
+    *ept_entry = new_ept;
+
+    return rv;
+}
+
 /* Take the currently mapped table, find the corresponding gfn entry,
  * and map the next table, if available.  If the entry is empty
  * and read_only is set, 
@@ -134,14 +202,18 @@
  */
 static int ept_next_level(struct domain *d, bool_t read_only,
                           ept_entry_t **table, unsigned long *gfn_remainder,
-                          u32 shift)
+                          int next_level)
 {
     ept_entry_t *ept_entry;
-    ept_entry_t *next;
-    u32 index;
+    u32 shift, index;
+
+    shift = next_level * EPT_TABLE_ORDER;
 
     index = *gfn_remainder >> shift;
 
+    /* index must be falling into the page */
+    ASSERT(index < EPT_PAGETABLE_ENTRIES);
+
     ept_entry = (*table) + index;
 
     if ( !is_epte_present(ept_entry) )
@@ -161,69 +233,15 @@
         return GUEST_TABLE_SUPER_PAGE;
     else
     {
+        unsigned long mfn = ept_entry->mfn;
+
+        unmap_domain_page(*table);
+        *table = map_domain_page(mfn);
         *gfn_remainder &= (1UL << shift) - 1;
-        next = map_domain_page(ept_entry->mfn);
-        unmap_domain_page(*table);
-        *table = next;
         return GUEST_TABLE_NORMAL_PAGE;
     }
 }
 
-/* It's super page before and we should break down it now. */
-static int ept_split_large_page(struct domain *d,
-                                ept_entry_t **table, u32 *index,
-                                unsigned long gfn, int level)
-{
-    ept_entry_t *prev_table = *table;
-    ept_entry_t *split_table = NULL;
-    ept_entry_t *split_entry = NULL;
-    ept_entry_t *ept_entry = (*table) + (*index);
-    ept_entry_t temp_ept_entry;
-    unsigned long s_gfn, s_mfn;
-    unsigned long offset, trunk;
-    int i;
-
-    /* alloc new page for new ept middle level entry which is
-     * before a leaf super entry
-     */
-
-    if ( !ept_set_middle_entry(d, &temp_ept_entry) )
-        return 0;
-
-    /* split the super page to small next level pages */
-    split_table = map_domain_page(temp_ept_entry.mfn);
-    offset = gfn & ((1UL << (level * EPT_TABLE_ORDER)) - 1);
-    trunk = (1UL << ((level-1) * EPT_TABLE_ORDER));
-
-    for ( i = 0; i < (1UL << EPT_TABLE_ORDER); i++ )
-    {
-        s_gfn = gfn - offset + i * trunk;
-        s_mfn = ept_entry->mfn + i * trunk;
-
-        split_entry = split_table + i;
-        split_entry->emt = ept_entry->emt;
-        split_entry->ipat = ept_entry->ipat;
-
-        split_entry->sp = (level > 1) ? 1 : 0;
-
-        split_entry->mfn = s_mfn;
-
-        split_entry->avail1 = ept_entry->avail1;
-        split_entry->avail2 = 0;
-        /* last step */
-        split_entry->r = split_entry->w = split_entry->x = 1;
-        ept_p2m_type_to_flags(split_entry, ept_entry->avail1);
-    }
-
-    *ept_entry = temp_ept_entry;
-    
-    *index = offset / trunk;
-    *table = split_table;
-    unmap_domain_page(prev_table);
-
-    return 1;
-}
-
 /*
  * ept_set_entry() computes 'need_modify_vtd_table' for itself,
  * by observing whether any gfn->mfn translations are modified.
@@ -265,7 +283,7 @@
 
     for ( i = ept_get_wl(d); i > target; i-- )
     {
-        ret = ept_next_level(d, 0, &table, &gfn_remainder, i * 
EPT_TABLE_ORDER);
+        ret = ept_next_level(d, 0, &table, &gfn_remainder, i);
         if ( !ret )
             goto out;
         else if ( ret != GUEST_TABLE_NORMAL_PAGE )
@@ -275,12 +293,10 @@
     ASSERT(ret != GUEST_TABLE_POD_PAGE || i != target);
 
     index = gfn_remainder >> (i * EPT_TABLE_ORDER);
-    gfn_remainder &= (1UL << (i * EPT_TABLE_ORDER)) - 1;
+    offset = gfn_remainder & ((1UL << (i * EPT_TABLE_ORDER)) - 1);
 
     ept_entry = table + index;
 
-    offset = gfn_remainder;
-
     /*
      * When we are here, we must be on a leaf ept entry
      * with i == target or i > target.
@@ -301,15 +317,14 @@
                                                 direct_mmio);
             ept_entry->ipat = ipat;
             ept_entry->sp = order ? 1 : 0;
+            ept_entry->avail1 = p2mt;
+            ept_entry->avail2 = 0;
 
             if ( ept_entry->mfn == mfn_x(mfn) )
                 need_modify_vtd_table = 0;
             else
                 ept_entry->mfn = mfn_x(mfn);
 
-            ept_entry->avail1 = p2mt;
-            ept_entry->avail2 = 0;
-
             ept_p2m_type_to_flags(ept_entry, p2mt);
         }
         else
@@ -318,33 +333,50 @@
     else
     {
         /* We need to split the original page. */
-        ept_entry_t *split_ept_entry;
+        ept_entry_t split_ept_entry;
 
         ASSERT(is_epte_superpage(ept_entry));
 
-        for ( ; i > target; i-- )
+        split_ept_entry = *ept_entry;
+
+        if ( !ept_split_super_page(d, &split_ept_entry, i, target) )
         {
-            rv = ept_split_large_page(d, &table, &index, gfn, i);
-            if ( !rv )
-                goto out;
+            ept_free_entry(d, &split_ept_entry, i);
+            goto out;
         }
 
-        split_ept_entry = table + index;
-        split_ept_entry->avail1 = p2mt;
-        ept_p2m_type_to_flags(split_ept_entry, p2mt);
-        split_ept_entry->emt = epte_get_entry_emt(d, gfn, mfn, &ipat,
-                                                  direct_mmio);
-        split_ept_entry->ipat = ipat;
+        /* now install the newly split ept sub-tree */
+        /* NB: please make sure domian is paused and no in-fly VT-d DMA. */
+        *ept_entry = split_ept_entry;
 
-        if ( split_ept_entry->mfn == mfn_x(mfn) )
-            need_modify_vtd_table = 0;
-        else
-            split_ept_entry->mfn = mfn_x(mfn);
+        /* then move to the level we want to make real changes */
+        for ( ; i > target; i-- )
+            ept_next_level(d, 0, &table, &gfn_remainder, i);
+
+        ASSERT(i == target);
+
+        index = gfn_remainder >> (i * EPT_TABLE_ORDER);
+        offset = gfn_remainder & ((1UL << (i * EPT_TABLE_ORDER)) - 1);
+
+        ept_entry = table + index;
+
+        ept_entry->emt = epte_get_entry_emt(d, gfn, mfn, &ipat, direct_mmio);
+        ept_entry->ipat = ipat;
+        ept_entry->sp = i ? 1 : 0;
+        ept_entry->avail1 = p2mt;
+        ept_entry->avail2 = 0;
+
+        if ( ept_entry->mfn == mfn_x(mfn) )
+             need_modify_vtd_table = 0;
+        else /* the caller should take care of the previous page */
+            ept_entry->mfn = mfn_x(mfn);
+
+        ept_p2m_type_to_flags(ept_entry, p2mt);
     }
 
     /* Track the highest gfn for which we have ever had a valid mapping */
-    if ( mfn_valid(mfn_x(mfn))
-         && (gfn + (1UL << order) - 1 > d->arch.p2m->max_mapped_pfn) )
+    if ( mfn_valid(mfn_x(mfn)) &&
+         (gfn + (1UL << order) - 1 > d->arch.p2m->max_mapped_pfn) )
         d->arch.p2m->max_mapped_pfn = gfn + (1UL << order) - 1;
 
     /* Success */
@@ -366,11 +398,11 @@
                 for ( i = 0; i < (1 << order); i++ )
                     iommu_map_page(
                         d, gfn - offset + i, mfn_x(mfn) - offset + i,
-                        IOMMUF_readable|IOMMUF_writable);
+                        IOMMUF_readable | IOMMUF_writable);
             }
             else if ( !order )
                 iommu_map_page(
-                    d, gfn, mfn_x(mfn), IOMMUF_readable|IOMMUF_writable);
+                    d, gfn, mfn_x(mfn), IOMMUF_readable | IOMMUF_writable);
         }
         else
         {
@@ -410,8 +442,7 @@
     for ( i = ept_get_wl(d); i > 0; i-- )
     {
     retry:
-        ret = ept_next_level(d, 1, &table, &gfn_remainder,
-                             i * EPT_TABLE_ORDER);
+        ret = ept_next_level(d, 1, &table, &gfn_remainder, i);
         if ( !ret )
             goto out;
         else if ( ret == GUEST_TABLE_POD_PAGE )
@@ -498,8 +529,7 @@
 
     for ( i = ept_get_wl(d); i > 0; i-- )
     {
-        ret = ept_next_level(d, 1, &table, &gfn_remainder,
-                             i * EPT_TABLE_ORDER);
+        ret = ept_next_level(d, 1, &table, &gfn_remainder, i);
         if ( !ret || ret == GUEST_TABLE_POD_PAGE )
             goto out;
         else if ( ret == GUEST_TABLE_SUPER_PAGE )
@@ -722,8 +752,7 @@
 
             for ( i = ept_get_wl(d); i > 0; i-- )
             {
-                ret = ept_next_level(d, 1, &table, &gfn_remainder,
-                                     i * EPT_TABLE_ORDER);
+                ret = ept_next_level(d, 1, &table, &gfn_remainder, i);
                 if ( ret != GUEST_TABLE_NORMAL_PAGE )
                     break;
             }

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.