[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[xen staging] x86/mem_sharing: resolve mm-lock order violations when forking VMs with nested p2m



commit 3e6c560ea1931ff579fc2c9504a6e46e5c4572c9
Author:     Tamas K Lengyel <tamas.lengyel@xxxxxxxxx>
AuthorDate: Fri Jan 8 11:51:36 2021 +0100
Commit:     Jan Beulich <jbeulich@xxxxxxxx>
CommitDate: Fri Jan 8 11:51:36 2021 +0100

    x86/mem_sharing: resolve mm-lock order violations when forking VMs with 
nested p2m
    
    Several lock-order violations have been encountered while attempting to fork
    VMs with nestedhvm=1 set. This patch resolves the issues.
    
    The order violations stems from a call to p2m_flush_nestedp2m being 
performed
    whenever the hostp2m changes. This functions always takes the p2m lock for 
the
    nested_p2m. However, with sharing the p2m locks always have to be taken 
before
    the sharing lock. To resolve this issue we avoid taking the sharing lock 
where
    possible (and was actually unecessary to begin with). But we also make
    p2m_flush_nestedp2m aware that the p2m lock may have already been taken and
    preemptively take all nested_p2m locks before unsharing a page where taking 
the
    sharing lock is necessary.
    
    Signed-off-by: Tamas K Lengyel <tamas.lengyel@xxxxxxxxx>
    Acked-by: Jan Beulich <jbeulich@xxxxxxxx>
---
 xen/arch/x86/mm/mem_sharing.c | 44 +++++++++++++++++++++++++++++--------------
 xen/arch/x86/mm/p2m.c         | 12 ++++++++++--
 2 files changed, 40 insertions(+), 16 deletions(-)

diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c
index ad9d495110..a98a1709c2 100644
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -39,6 +39,7 @@
 #include <asm/event.h>
 #include <asm/hap.h>
 #include <asm/hvm/hvm.h>
+#include <asm/hvm/nestedhvm.h>
 #include <xsm/xsm.h>
 
 #include <public/hvm/params.h>
@@ -893,13 +894,11 @@ static int nominate_page(struct domain *d, gfn_t gfn,
         goto out;
 
     /*
-     * Now that the page is validated, we can lock it. There is no
-     * race because we're holding the p2m entry, so no one else
-     * could be nominating this gfn.
+     * Now that the page is validated, we can make it shared. There is no race
+     * because we're holding the p2m entry, so no one else could be nominating
+     * this gfn & and it is evidently not yet shared with any other VM, thus we
+     * don't need to take the mem_sharing_page_lock here.
      */
-    ret = -ENOENT;
-    if ( !mem_sharing_page_lock(page) )
-        goto out;
 
     /* Initialize the shared state */
     ret = -ENOMEM;
@@ -935,7 +934,6 @@ static int nominate_page(struct domain *d, gfn_t gfn,
 
     *phandle = page->sharing->handle;
     audit_add_list(page);
-    mem_sharing_page_unlock(page);
     ret = 0;
 
 out:
@@ -1214,7 +1212,8 @@ int __mem_sharing_unshare_page(struct domain *d,
     p2m_type_t p2mt;
     mfn_t mfn;
     struct page_info *page, *old_page;
-    int last_gfn;
+    bool last_gfn;
+    int rc = 0;
     gfn_info_t *gfn_info = NULL;
 
     mfn = get_gfn(d, gfn, &p2mt);
@@ -1226,6 +1225,15 @@ int __mem_sharing_unshare_page(struct domain *d,
         return 0;
     }
 
+    /* lock nested p2ms to avoid lock-order violation with sharing lock */
+    if ( unlikely(nestedhvm_enabled(d)) )
+    {
+        unsigned int i;
+
+        for ( i = 0; i < MAX_NESTEDP2M; i++ )
+            p2m_lock(d->arch.nested_p2m[i]);
+    }
+
     page = __grab_shared_page(mfn);
     if ( page == NULL )
     {
@@ -1276,9 +1284,7 @@ int __mem_sharing_unshare_page(struct domain *d,
             put_page_alloc_ref(page);
 
         put_page_and_type(page);
-        put_gfn(d, gfn);
-
-        return 0;
+        goto out;
     }
 
     if ( last_gfn )
@@ -1295,12 +1301,12 @@ int __mem_sharing_unshare_page(struct domain *d,
         /* Undo dec of nr_saved_mfns, as the retry will decrease again. */
         atomic_inc(&nr_saved_mfns);
         mem_sharing_page_unlock(old_page);
-        put_gfn(d, gfn);
         /*
          * Caller is responsible for placing an event
          * in the ring.
          */
-        return -ENOMEM;
+        rc = -ENOMEM;
+        goto out;
     }
 
     copy_domain_page(page_to_mfn(page), page_to_mfn(old_page));
@@ -1327,8 +1333,18 @@ int __mem_sharing_unshare_page(struct domain *d,
      */
     paging_mark_dirty(d, page_to_mfn(page));
     /* We do not need to unlock a private page */
+
+ out:
+    if ( unlikely(nestedhvm_enabled(d)) )
+    {
+        unsigned int i;
+
+        for ( i = 0; i < MAX_NESTEDP2M; i++ )
+            p2m_unlock(d->arch.nested_p2m[i]);
+    }
+
     put_gfn(d, gfn);
-    return 0;
+    return rc;
 }
 
 int relinquish_shared_pages(struct domain *d)
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index ad4bb94514..a32301c343 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -1597,9 +1597,17 @@ p2m_flush(struct vcpu *v, struct p2m_domain *p2m)
 void
 p2m_flush_nestedp2m(struct domain *d)
 {
-    int i;
+    unsigned int i;
+
     for ( i = 0; i < MAX_NESTEDP2M; i++ )
-        p2m_flush_table(d->arch.nested_p2m[i]);
+    {
+        struct p2m_domain *p2m = d->arch.nested_p2m[i];
+
+        if ( p2m_locked_by_me(p2m) )
+            p2m_flush_table_locked(p2m);
+        else
+            p2m_flush_table(p2m);
+    }
 }
 
 void np2m_flush_base(struct vcpu *v, unsigned long np2m_base)
--
generated by git-patchbot for /home/xen/git/xen.git#staging



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.