[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v17 1/2] mem_sharing: fix sharability check during fork reset



When resetting a VM fork we ought to only remove pages that were allocated for
the fork during it's execution and the contents copied over from the parent.
This can be determined if the page is sharable as special pages used by the
fork for other purposes will not pass this test. Unfortunately during the fork
reset loop we only partially check whether that's the case. A page's type may
indicate it is sharable (pass p2m_is_sharable) but that's not a sufficient
check by itself. All checks that are normally performed before a page is
converted to the sharable type need to be performed to avoid removing pages
from the p2m that may be used for other purposes. For example, currently the
reset loop also removes the vcpu info pages from the p2m, potentially putting
the guest into infinite page-fault loops.

Signed-off-by: Tamas K Lengyel <tamas.lengyel@xxxxxxxxx>
---
v17: Changes based on feedback from Roger
---
 xen/arch/x86/mm/mem_sharing.c | 83 ++++++++++++++++++++---------------
 1 file changed, 47 insertions(+), 36 deletions(-)

diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c
index bb74595351..344a5bfb3d 100644
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -633,31 +633,33 @@ unsigned int mem_sharing_get_nr_shared_mfns(void)
 /* Functions that change a page's type and ownership */
 static int page_make_sharable(struct domain *d,
                               struct page_info *page,
-                              int expected_refcnt)
+                              unsigned int expected_refcnt,
+                              bool validate_only)
 {
-    bool_t drop_dom_ref;
+    int rc = 0;
+    bool drop_dom_ref = false;
 
-    spin_lock(&d->page_alloc_lock);
+    spin_lock_recursive(&d->page_alloc_lock);
 
     if ( d->is_dying )
     {
-        spin_unlock(&d->page_alloc_lock);
-        return -EBUSY;
+        rc = -EBUSY;
+        goto out;
     }
 
     /* Change page type and count atomically */
     if ( !get_page_and_type(page, d, PGT_shared_page) )
     {
-        spin_unlock(&d->page_alloc_lock);
-        return -EINVAL;
+        rc = -EINVAL;
+        goto out;
     }
 
     /* Check it wasn't already sharable and undo if it was */
     if ( (page->u.inuse.type_info & PGT_count_mask) != 1 )
     {
-        spin_unlock(&d->page_alloc_lock);
         put_page_and_type(page);
-        return -EEXIST;
+        rc = -EEXIST;
+        goto out;
     }
 
     /*
@@ -666,20 +668,26 @@ static int page_make_sharable(struct domain *d,
      */
     if ( page->count_info != (PGC_allocated | (2 + expected_refcnt)) )
     {
-        spin_unlock(&d->page_alloc_lock);
         /* Return type count back to zero */
         put_page_and_type(page);
-        return -E2BIG;
+        rc = -E2BIG;
+        goto out;
     }
 
-    page_set_owner(page, dom_cow);
-    drop_dom_ref = !domain_adjust_tot_pages(d, -1);
-    page_list_del(page, &d->page_list);
-    spin_unlock(&d->page_alloc_lock);
+    if ( !validate_only )
+    {
+        page_set_owner(page, dom_cow);
+        drop_dom_ref = !domain_adjust_tot_pages(d, -1);
+        page_list_del(page, &d->page_list);
+    }
+
+out:
+    spin_unlock_recursive(&d->page_alloc_lock);
 
     if ( drop_dom_ref )
         put_domain(d);
-    return 0;
+
+    return rc;
 }
 
 static int page_make_private(struct domain *d, struct page_info *page)
@@ -810,7 +818,8 @@ static int debug_gref(struct domain *d, grant_ref_t ref)
 }
 
 static int nominate_page(struct domain *d, gfn_t gfn,
-                         int expected_refcnt, shr_handle_t *phandle)
+                         unsigned int expected_refcnt, bool validate_only,
+                         shr_handle_t *phandle)
 {
     struct p2m_domain *hp2m = p2m_get_hostp2m(d);
     p2m_type_t p2mt;
@@ -879,8 +888,8 @@ static int nominate_page(struct domain *d, gfn_t gfn,
     }
 
     /* Try to convert the mfn to the sharable type */
-    ret = page_make_sharable(d, page, expected_refcnt);
-    if ( ret )
+    ret = page_make_sharable(d, page, expected_refcnt, validate_only);
+    if ( ret || validate_only )
         goto out;
 
     /*
@@ -1392,13 +1401,13 @@ static int range_share(struct domain *d, struct domain 
*cd,
          * We only break out if we run out of memory as individual pages may
          * legitimately be unsharable and we just want to skip over those.
          */
-        rc = nominate_page(d, _gfn(start), 0, &sh);
+        rc = nominate_page(d, _gfn(start), 0, false, &sh);
         if ( rc == -ENOMEM )
             break;
 
         if ( !rc )
         {
-            rc = nominate_page(cd, _gfn(start), 0, &ch);
+            rc = nominate_page(cd, _gfn(start), 0, false, &ch);
             if ( rc == -ENOMEM )
                 break;
 
@@ -1478,7 +1487,7 @@ int mem_sharing_fork_page(struct domain *d, gfn_t gfn, 
bool unsharing)
         /* For read-only accesses we just add a shared entry to the physmap */
         while ( parent )
         {
-            if ( !(rc = nominate_page(parent, gfn, 0, &handle)) )
+            if ( !(rc = nominate_page(parent, gfn, 0, false, &handle)) )
                 break;
 
             parent = parent->parent;
@@ -1775,20 +1784,22 @@ static int mem_sharing_fork_reset(struct domain *d, 
struct domain *pd)
     spin_lock_recursive(&d->page_alloc_lock);
     page_list_for_each_safe(page, tmp, &d->page_list)
     {
-        p2m_type_t p2mt;
-        p2m_access_t p2ma;
+        shr_handle_t sh;
         mfn_t mfn = page_to_mfn(page);
         gfn_t gfn = mfn_to_gfn(d, mfn);
 
-        mfn = __get_gfn_type_access(p2m, gfn_x(gfn), &p2mt, &p2ma,
-                                    0, NULL, false);
-
-        /* only reset pages that are sharable */
-        if ( !p2m_is_sharable(p2mt) )
-            continue;
-
-        /* take an extra reference or just skip if can't for whatever reason */
-        if ( !get_page(page, d) )
+        /*
+         * We only want to remove pages from the fork here that were copied
+         * from the parent but could be potentially re-populated using memory
+         * sharing after the reset. These pages all must be regular pages with
+         * no extra reference held to them, thus should be possible to make
+         * them sharable. Unfortunately p2m_is_sharable check is not sufficient
+         * to test this as it doesn't check the page's reference count. We thus
+         * check whether the page is convertable to the shared type using
+         * nominate_page. In case the page is already shared (ie. a share
+         * handle is returned) then we don't remove it.
+         */
+        if ( (rc = nominate_page(d, gfn, 0, true, &sh)) || sh )
             continue;
 
         /* forked memory is 4k, not splitting large pages so this must work */
@@ -1797,7 +1808,7 @@ static int mem_sharing_fork_reset(struct domain *d, 
struct domain *pd)
         ASSERT(!rc);
 
         put_page_alloc_ref(page);
-        put_page(page);
+        put_page_and_type(page);
     }
     spin_unlock_recursive(&d->page_alloc_lock);
 
@@ -1839,7 +1850,7 @@ int 
mem_sharing_memop(XEN_GUEST_HANDLE_PARAM(xen_mem_sharing_op_t) arg)
     {
         shr_handle_t handle;
 
-        rc = nominate_page(d, _gfn(mso.u.nominate.u.gfn), 0, &handle);
+        rc = nominate_page(d, _gfn(mso.u.nominate.u.gfn), 0, false, &handle);
         mso.u.nominate.handle = handle;
     }
     break;
@@ -1854,7 +1865,7 @@ int 
mem_sharing_memop(XEN_GUEST_HANDLE_PARAM(xen_mem_sharing_op_t) arg)
         if ( rc < 0 )
             goto out;
 
-        rc = nominate_page(d, gfn, 3, &handle);
+        rc = nominate_page(d, gfn, 3, false, &handle);
         mso.u.nominate.handle = handle;
     }
     break;
-- 
2.20.1




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.