[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[xen staging] xen/x86: p2m: Add preemption in p2m_teardown()



commit 8a2111250b424edc49c65c4d41b276766d30635c
Author:     Julien Grall <jgrall@xxxxxxxxxx>
AuthorDate: Tue Oct 11 14:24:48 2022 +0200
Commit:     Jan Beulich <jbeulich@xxxxxxxx>
CommitDate: Tue Oct 11 14:24:48 2022 +0200

    xen/x86: p2m: Add preemption in p2m_teardown()
    
    The list p2m->pages contain all the pages used by the P2M. On large
    instance this can be quite large and the time spent to call
    d->arch.paging.free_page() will take more than 1ms for a 80GB guest
    on a Xen running in nested environment on a c5.metal.
    
    By extrapolation, it would take > 100ms for a 8TB guest (what we
    current security support). So add some preemption in p2m_teardown()
    and propagate to the callers. Note there are 3 places where
    the preemption is not enabled:
        - hap_final_teardown()/shadow_final_teardown(): We are
          preventing update the P2M once the domain is dying (so
          no more pages could be allocated) and most of the P2M pages
          will be freed in preemptive manneer when relinquishing the
          resources. So this is fine to disable preemption.
        - shadow_enable(): This is fine because it will undo the allocation
          that may have been made by p2m_alloc_table() (so only the root
          page table).
    
    The preemption is arbitrarily checked every 1024 iterations.
    
    We now need to include <xen/event.h> in p2m-basic in order to
    import the definition for local_events_need_delivery() used by
    general_preempt_check(). Ideally, the inclusion should happen in
    xen/sched.h but it opened a can of worms.
    
    Note that with the current approach, Xen doesn't keep track on whether
    the alt/nested P2Ms have been cleared. So there are some redundant work.
    However, this is not expected to incurr too much overhead (the P2M lock
    shouldn't be contended during teardown). So this is optimization is
    left outside of the security event.
    
    This is part of CVE-2022-33746 / XSA-410.
    
    Signed-off-by: Julien Grall <jgrall@xxxxxxxxxx>
    Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
    ----
    Changes since v12:
        - Correct altp2m preemption check placement.
    
    Changes since v9:
        - Integrate patch into series.
    
    Changes since v2:
        - Rework the loop doing the preemption
        - Add a comment in shadow_enable() to explain why p2m_teardown()
          doesn't need to be preemptible.
    
    Changes since v1:
        - Update the commit message
        - Rebase on top of Roger's v8 series
        - Fix preemption check
        - Use 'unsigned int' rather than 'unsigned long' for the counter
---
 xen/arch/x86/include/asm/p2m.h  |  2 +-
 xen/arch/x86/mm/hap/hap.c       | 22 ++++++++++++++++------
 xen/arch/x86/mm/p2m-basic.c     | 19 ++++++++++++++++---
 xen/arch/x86/mm/shadow/common.c | 12 +++++++++---
 4 files changed, 42 insertions(+), 13 deletions(-)

diff --git a/xen/arch/x86/include/asm/p2m.h b/xen/arch/x86/include/asm/p2m.h
index bafbd96052..bd684d02f3 100644
--- a/xen/arch/x86/include/asm/p2m.h
+++ b/xen/arch/x86/include/asm/p2m.h
@@ -600,7 +600,7 @@ int p2m_init(struct domain *d);
 int p2m_alloc_table(struct p2m_domain *p2m);
 
 /* Return all the p2m resources to Xen. */
-void p2m_teardown(struct p2m_domain *p2m, bool remove_root);
+void p2m_teardown(struct p2m_domain *p2m, bool remove_root, bool *preempted);
 void p2m_final_teardown(struct domain *d);
 
 /* Add/remove a page to/from a domain's p2m table. */
diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
index d058050d63..f809ea9aa6 100644
--- a/xen/arch/x86/mm/hap/hap.c
+++ b/xen/arch/x86/mm/hap/hap.c
@@ -548,17 +548,17 @@ void hap_final_teardown(struct domain *d)
 
     if ( hvm_altp2m_supported() )
         for ( i = 0; i < MAX_ALTP2M; i++ )
-            p2m_teardown(d->arch.altp2m_p2m[i], true);
+            p2m_teardown(d->arch.altp2m_p2m[i], true, NULL);
 
     /* Destroy nestedp2m's first */
     for (i = 0; i < MAX_NESTEDP2M; i++) {
-        p2m_teardown(d->arch.nested_p2m[i], true);
+        p2m_teardown(d->arch.nested_p2m[i], true, NULL);
     }
 
     if ( d->arch.paging.hap.total_pages != 0 )
         hap_teardown(d, NULL);
 
-    p2m_teardown(p2m_get_hostp2m(d), true);
+    p2m_teardown(p2m_get_hostp2m(d), true, NULL);
     /* Free any memory that the p2m teardown released */
     paging_lock(d);
     hap_set_allocation(d, 0, NULL);
@@ -612,14 +612,24 @@ void hap_teardown(struct domain *d, bool *preempted)
         FREE_XENHEAP_PAGE(d->arch.altp2m_visible_eptp);
 
         for ( i = 0; i < MAX_ALTP2M; i++ )
-            p2m_teardown(d->arch.altp2m_p2m[i], false);
+        {
+            p2m_teardown(d->arch.altp2m_p2m[i], false, preempted);
+            if ( preempted && *preempted )
+                return;
+        }
     }
 
     /* Destroy nestedp2m's after altp2m. */
     for ( i = 0; i < MAX_NESTEDP2M; i++ )
-        p2m_teardown(d->arch.nested_p2m[i], false);
+    {
+        p2m_teardown(d->arch.nested_p2m[i], false, preempted);
+        if ( preempted && *preempted )
+            return;
+    }
 
-    p2m_teardown(p2m_get_hostp2m(d), false);
+    p2m_teardown(p2m_get_hostp2m(d), false, preempted);
+    if ( preempted && *preempted )
+        return;
 
     paging_lock(d); /* Keep various asserts happy */
 
diff --git a/xen/arch/x86/mm/p2m-basic.c b/xen/arch/x86/mm/p2m-basic.c
index 3231aaa9ba..47b780d6d6 100644
--- a/xen/arch/x86/mm/p2m-basic.c
+++ b/xen/arch/x86/mm/p2m-basic.c
@@ -23,6 +23,7 @@
  * along with this program; If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <xen/event.h>
 #include <xen/types.h>
 #include <asm/p2m.h>
 #include "mm-locks.h"
@@ -154,11 +155,12 @@ int p2m_init(struct domain *d)
  * hvm fixme: when adding support for pvh non-hardware domains, this path must
  * cleanup any foreign p2m types (release refcnts on them).
  */
-void p2m_teardown(struct p2m_domain *p2m, bool remove_root)
+void p2m_teardown(struct p2m_domain *p2m, bool remove_root, bool *preempted)
 {
 #ifdef CONFIG_HVM
     struct page_info *pg, *root_pg = NULL;
     struct domain *d;
+    unsigned int i = 0;
 
     if ( !p2m )
         return;
@@ -180,8 +182,19 @@ void p2m_teardown(struct p2m_domain *p2m, bool remove_root)
     }
 
     while ( (pg = page_list_remove_head(&p2m->pages)) )
-        if ( pg != root_pg )
-            d->arch.paging.free_page(d, pg);
+    {
+        if ( pg == root_pg )
+            continue;
+
+        d->arch.paging.free_page(d, pg);
+
+        /* Arbitrarily check preemption every 1024 iterations */
+        if ( preempted && !(++i % 1024) && general_preempt_check() )
+        {
+            *preempted = true;
+            break;
+        }
+    }
 
     if ( root_pg )
         page_list_add(root_pg, &p2m->pages);
diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
index 64ca18b393..d985d51614 100644
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -2776,8 +2776,12 @@ int shadow_enable(struct domain *d, u32 mode)
     paging_unlock(d);
  out_unlocked:
 #ifdef CONFIG_HVM
+    /*
+     * This is fine to ignore the preemption here because only the root
+     * will be allocated by p2m_alloc_table().
+     */
     if ( rv != 0 && !pagetable_is_null(p2m_get_pagetable(p2m)) )
-        p2m_teardown(p2m, true);
+        p2m_teardown(p2m, true, NULL);
 #endif
     if ( rv != 0 && pg != NULL )
     {
@@ -2831,7 +2835,9 @@ void shadow_teardown(struct domain *d, bool *preempted)
     for_each_vcpu ( d, v )
         shadow_vcpu_teardown(v);
 
-    p2m_teardown(p2m_get_hostp2m(d), false);
+    p2m_teardown(p2m_get_hostp2m(d), false, preempted);
+    if ( preempted && *preempted )
+        return;
 
     paging_lock(d);
 
@@ -2952,7 +2958,7 @@ void shadow_final_teardown(struct domain *d)
         shadow_teardown(d, NULL);
 
     /* It is now safe to pull down the p2m map. */
-    p2m_teardown(p2m_get_hostp2m(d), true);
+    p2m_teardown(p2m_get_hostp2m(d), true, NULL);
     /* Free any shadow memory that the p2m teardown released */
     paging_lock(d);
     shadow_set_allocation(d, 0, NULL);
--
generated by git-patchbot for /home/xen/git/xen.git#staging



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.