[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 2 of 2] Memory sharing: better handling of ENOMEM while unsharing


  • To: xen-devel@xxxxxxxxxxxxx
  • From: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
  • Date: Mon, 12 Mar 2012 11:29:38 -0400
  • Cc: andres@xxxxxxxxxxxxxx, tim@xxxxxxx, adin@xxxxxxxxxxxxxx
  • Delivery-date: Mon, 12 Mar 2012 15:29:50 +0000
  • Domainkey-signature: a=rsa-sha1; c=nofws; d=lagarcavilla.org; h=content-type :mime-version:content-transfer-encoding:subject:message-id :in-reply-to:references:date:from:to:cc; q=dns; s= lagarcavilla.org; b=bac3YWOL05zfd3SBxTI6JABhGfuig5JYy6A8dKn38tZk ahJZ4Fu/eLkiKAtm6AIC/UNVDyF4fmlmHlo1wNS50C1VdRWGP5xGjnWhjhONHHCW fU+BAukK508QwI1QnqvCOaGnz/RtrQXT8Ec36H6pO3TRPsa7HfnNaoe2qCF4hFw=
  • List-id: Xen developer discussion <xen-devel.lists.xen.org>

 xen/arch/x86/hvm/hvm.c            |  23 +++++++++++++++-
 xen/arch/x86/mm.c                 |   8 +++--
 xen/arch/x86/mm/mem_sharing.c     |  54 +++++++++++++++++++++++---------------
 xen/arch/x86/mm/p2m.c             |  18 ++++++++++++-
 xen/common/grant_table.c          |  11 ++++---
 xen/common/memory.c               |   1 +
 xen/include/asm-x86/mem_sharing.h |  27 ++++++++++++++++++-
 7 files changed, 110 insertions(+), 32 deletions(-)


If unsharing fails with ENOMEM, we were:
 - leaving the list of gfns backed by the shared page in an inconsistent state
 - cycling forever on the hap page fault handler.
 - Attempting to produce a mem event (which could sleep on a wait queue)
   while holding locks.
 - Not checking, for all callers, that unshare could have indeed failed.

Fix bugs above, and sanitize callers to place a ring event in an unlocked
context, or without requiring to go to sleep on a wait queue.

A note on the rationale for unshare error handling:
 1. Unshare can only fail with ENOMEM. Any other error conditions BUG_ON()
 2. We notify a potential dom0 helper through a mem_event ring. But we
    allow the notification to not go to sleep. If the event ring is full
    of ENOMEM warnings, then the helper will already have been kicked enough.
 3. We cannot "just" go to sleep until the unshare is resolved, because we
    might be buried deep into locks (e.g. something -> copy_to_user ->
    __hvm_copy)
 4. So, we make sure we:
    4.1. return an error
    4.2. do not corrupt memory shared with other guests
    4.3. do not corrupt memory private to the current guest
    4.4. do not corrupt the hypervisor memory sharing meta data
    4.5. let the guest deal with the error, if propagation will reach that far

Signed-off-by: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>

diff -r d56278bd5357 -r dfbcb092aa66 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -1233,6 +1233,9 @@ int hvm_hap_nested_page_fault(unsigned l
     struct vcpu *v = current;
     struct p2m_domain *p2m;
     int rc, fall_through = 0, paged = 0;
+#ifdef __x86_64__
+    int sharing_enomem = 0;
+#endif
     mem_event_request_t *req_ptr = NULL;
 
     /* On Nested Virtualization, walk the guest page table.
@@ -1342,7 +1345,8 @@ int hvm_hap_nested_page_fault(unsigned l
     if ( access_w && (p2mt == p2m_ram_shared) )
     {
         ASSERT(!p2m_is_nestedp2m(p2m));
-        mem_sharing_unshare_page(p2m->domain, gfn, 0);
+        sharing_enomem = 
+            (mem_sharing_unshare_page(p2m->domain, gfn, 0) < 0);
         rc = 1;
         goto out_put_gfn;
     }
@@ -1383,8 +1387,25 @@ int hvm_hap_nested_page_fault(unsigned l
 out_put_gfn:
     put_gfn(p2m->domain, gfn);
 out:
+    /* All of these are delayed until we exit, since we might 
+     * sleep on event ring wait queues, and we must not hold
+     * locks in such circumstance */
     if ( paged )
         p2m_mem_paging_populate(v->domain, gfn);
+#ifdef __x86_64__
+    if ( sharing_enomem )
+    {
+        int rv;
+        if ( (rv = mem_sharing_notify_enomem(v->domain, gfn, 1)) < 0 )
+        {
+            gdprintk(XENLOG_ERR, "Domain %hu attempt to unshare "
+                     "gfn %lx, ENOMEM and no helper (rc %d)\n",
+                        v->domain->domain_id, gfn, rv);
+            /* Crash the domain */
+            rc = 0;
+        }
+    }
+#endif
     if ( req_ptr )
     {
         mem_access_send_req(v->domain, req_ptr);
diff -r d56278bd5357 -r dfbcb092aa66 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -3597,12 +3597,14 @@ int do_mmu_update(
                         /* Unshare the page for RW foreign mappings */
                         if ( l1e_get_flags(l1e) & _PAGE_RW )
                         {
-                            rc = mem_sharing_unshare_page(pg_owner, 
-                                                          l1e_get_pfn(l1e), 
-                                                          0);
+                            unsigned long gfn = l1e_get_pfn(l1e);
+                            rc = mem_sharing_unshare_page(pg_owner, gfn, 0); 
                             if ( rc )
                             {
                                 put_gfn(pg_owner, l1egfn);
+                                /* Notify helper, don't care about errors, 
will not
+                                 * sleep on wq, since we're a foreign domain. 
*/
+                                (void)mem_sharing_notify_enomem(pg_owner, gfn, 
0);
                                 break; 
                             }
                         }
diff -r d56278bd5357 -r dfbcb092aa66 xen/arch/x86/mm/mem_sharing.c
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -344,34 +344,29 @@ int mem_sharing_audit(void)
 #endif
 
 
-static void mem_sharing_notify_helper(struct domain *d, unsigned long gfn)
+int mem_sharing_notify_enomem(struct domain *d, unsigned long gfn,
+                                bool_t allow_sleep) 
 {
     struct vcpu *v = current;
+    int rc;
     mem_event_request_t req = { .gfn = gfn };
 
-    if ( v->domain != d )
+    if ( (rc = __mem_event_claim_slot(d, 
+                        &d->mem_event->share, allow_sleep)) < 0 )
+        return rc;
+
+    if ( v->domain == d )
     {
-        /* XXX This path needs some attention.  For now, just fail foreign 
-         * XXX requests to unshare if there's no memory.  This replaces 
-         * XXX old code that BUG()ed here; the callers now BUG()
-         * XXX elewhere. */
-        gdprintk(XENLOG_ERR, 
-                 "Failed alloc on unshare path for foreign (%d) lookup\n",
-                 d->domain_id);
-        return;
+        req.flags = MEM_EVENT_FLAG_VCPU_PAUSED;
+        vcpu_pause_nosync(v);
     }
 
-    if (mem_event_claim_slot(d, &d->mem_event->share) < 0)
-    {
-        return;
-    }
-
-    req.flags = MEM_EVENT_FLAG_VCPU_PAUSED;
-    vcpu_pause_nosync(v);
-
     req.p2mt = p2m_ram_shared;
     req.vcpu_id = v->vcpu_id;
+
     mem_event_put_request(d, &d->mem_event->share, &req);
+
+    return 0;
 }
 
 unsigned int mem_sharing_get_nr_saved_mfns(void)
@@ -903,7 +898,21 @@ err_out:
     return ret;
 }
 
-int mem_sharing_unshare_page(struct domain *d,
+
+/* A note on the rationale for unshare error handling:
+ *  1. Unshare can only fail with ENOMEM. Any other error conditions BUG_ON()'s
+ *  2. We notify a potential dom0 helper through a mem_event ring. But we
+ *     allow the notification to not go to sleep. If the event ring is full 
+ *     of ENOMEM warnings, then it's on the ball.
+ *  3. We cannot go to sleep until the unshare is resolved, because we might
+ *     be buried deep into locks (e.g. something -> copy_to_user -> 
__hvm_copy) 
+ *  4. So, we make sure we:
+ *     4.1. return an error
+ *     4.2. do not corrupt shared memory
+ *     4.3. do not corrupt guest memory
+ *     4.4. let the guest deal with it if the error propagation will reach it
+ */
+int __mem_sharing_unshare_page(struct domain *d,
                              unsigned long gfn, 
                              uint16_t flags)
 {
@@ -945,7 +954,6 @@ gfn_found:
     /* Do the accounting first. If anything fails below, we have bigger
      * bigger fish to fry. First, remove the gfn from the list. */ 
     last_gfn = list_has_one_entry(&page->sharing->gfns);
-    mem_sharing_gfn_destroy(d, gfn_info);
     if ( last_gfn )
     {
         /* Clean up shared state */
@@ -959,6 +967,7 @@ gfn_found:
      * (possibly freeing the page), and exit early */
     if ( flags & MEM_SHARING_DESTROY_GFN )
     {
+        mem_sharing_gfn_destroy(d, gfn_info);
         put_page_and_type(page);
         mem_sharing_page_unlock(page);
         if ( last_gfn && 
@@ -971,6 +980,7 @@ gfn_found:
  
     if ( last_gfn )
     {
+        mem_sharing_gfn_destroy(d, gfn_info);
         /* Making a page private atomically unlocks it */
         BUG_ON(page_make_private(d, page) != 0);
         goto private_page_found;
@@ -982,7 +992,8 @@ gfn_found:
     {
         mem_sharing_page_unlock(old_page);
         put_gfn(d, gfn);
-        mem_sharing_notify_helper(d, gfn);
+        /* Caller is responsible for placing an event
+         * in the ring */
         return -ENOMEM;
     }
 
@@ -993,6 +1004,7 @@ gfn_found:
     unmap_domain_page(t);
 
     BUG_ON(set_shared_p2m_entry(d, gfn, page_to_mfn(page)) == 0);
+    mem_sharing_gfn_destroy(d, gfn_info);
     mem_sharing_page_unlock(old_page);
     put_page_and_type(old_page);
 
diff -r d56278bd5357 -r dfbcb092aa66 xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -170,7 +170,10 @@ mfn_t __get_gfn_type_access(struct p2m_d
     if ( q == p2m_unshare && p2m_is_shared(*t) )
     {
         ASSERT(!p2m_is_nestedp2m(p2m));
-        mem_sharing_unshare_page(p2m->domain, gfn, 0);
+        /* Try to unshare. If we fail, communicate ENOMEM without
+         * sleeping. */
+        if ( mem_sharing_unshare_page(p2m->domain, gfn, 0) < 0 )
+            (void)mem_sharing_notify_enomem(p2m->domain, gfn, 0);
         mfn = p2m->get_entry(p2m, gfn, t, a, q, page_order);
     }
 #endif
@@ -371,6 +374,7 @@ void p2m_teardown(struct p2m_domain *p2m
         if ( mfn_valid(mfn) && (t == p2m_ram_shared) )
         {
             ASSERT(!p2m_is_nestedp2m(p2m));
+            /* Does not fail with ENOMEM given the DESTROY flag */
             BUG_ON(mem_sharing_unshare_page(d, gfn, MEM_SHARING_DESTROY_GFN));
         }
     }
@@ -510,6 +514,18 @@ guest_physmap_add_entry(struct domain *d
             if ( rc )
             {
                 p2m_unlock(p2m);
+                /* NOTE: Should a guest domain bring this upon itself,
+                 * there is not a whole lot we can do. We are buried
+                 * deep in locks from most code paths by now. So, fail
+                 * the call and don't try to sleep on a wait queue
+                 * while placing the mem event.
+                 *
+                 * However, all current (changeset 3432abcf9380) code
+                 * paths avoid this unsavoury situation. For now.
+                 *
+                 * Foreign domains are okay to place an event as they 
+                 * won't go to sleep. */
+                (void)mem_sharing_notify_enomem(p2m->domain, gfn + i, 0);
                 return rc;
             }
             omfn = p2m->get_entry(p2m, gfn + i, &ot, &a, p2m_query, NULL);
diff -r d56278bd5357 -r dfbcb092aa66 xen/common/grant_table.c
--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -112,8 +112,7 @@ static unsigned inline int max_nr_maptra
     p2m_type_t __p2mt;                                      \
     unsigned long __x;                                      \
     __x = mfn_x(get_gfn_unshare((_d), (_gfn), &__p2mt));    \
-    BUG_ON(p2m_is_shared(__p2mt)); /* XXX fixme */          \
-    if ( !p2m_is_valid(__p2mt) )                            \
+    if ( p2m_is_shared(__p2mt) || !p2m_is_valid(__p2mt) )   \
         __x = INVALID_MFN;                                  \
     __x; })
 #else
@@ -155,9 +154,11 @@ static int __get_paged_frame(unsigned lo
     else
     {
         mfn = get_gfn_unshare(rd, gfn, &p2mt);
-        BUG_ON(p2m_is_shared(p2mt));
-        /* XXX Here, and above in gfn_to_mfn_private, need to handle
-         * XXX failure to unshare. */
+        if ( p2m_is_shared(p2mt) )
+        {
+            put_gfn(rd, gfn);
+            return GNTST_eagain;
+        }
     }
 
     if ( p2m_is_valid(p2mt) ) {
diff -r d56278bd5357 -r dfbcb092aa66 xen/common/memory.c
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -200,6 +200,7 @@ int guest_remove_page(struct domain *d, 
         if ( mem_sharing_unshare_page(d, gmfn, 0) )
         {
             put_gfn(d, gmfn);
+            (void)mem_sharing_notify_enomem(d, gmfn, 0);
             return 0;
         }
         /* Maybe the mfn changed */
diff -r d56278bd5357 -r dfbcb092aa66 xen/include/asm-x86/mem_sharing.h
--- a/xen/include/asm-x86/mem_sharing.h
+++ b/xen/include/asm-x86/mem_sharing.h
@@ -52,10 +52,35 @@ int mem_sharing_nominate_page(struct dom
                               unsigned long gfn,
                               int expected_refcnt,
                               shr_handle_t *phandle);
+
 #define MEM_SHARING_DESTROY_GFN       (1<<1)
-int mem_sharing_unshare_page(struct domain *d, 
+/* Only fails with -ENOMEM. Enforce it with a BUG_ON wrapper. */
+int __mem_sharing_unshare_page(struct domain *d,
                              unsigned long gfn, 
                              uint16_t flags);
+static inline int mem_sharing_unshare_page(struct domain *d,
+                                           unsigned long gfn,
+                                           uint16_t flags)
+{
+    int rc = __mem_sharing_unshare_page(d, gfn, flags);
+    BUG_ON( rc && (rc != -ENOMEM) );
+    return rc;
+}
+
+/* If called by a foreign domain, possible errors are
+ *   -EBUSY -> ring full
+ *   -ENOSYS -> no ring to begin with
+ * and the foreign mapper is responsible for retrying.
+ *
+ * If called by the guest vcpu itself and allow_sleep is set, may 
+ * sleep on a wait queue, so the caller is responsible for not 
+ * holding locks on entry. It may only fail with ENOSYS 
+ *
+ * If called by the guest vcpu itself and allow_sleep is not set,
+ * then it's the same as a foreign domain.
+ */
+int mem_sharing_notify_enomem(struct domain *d, unsigned long gfn,
+                                bool_t allow_sleep);
 int mem_sharing_sharing_resume(struct domain *d);
 int mem_sharing_memop(struct domain *d, 
                        xen_mem_sharing_op_t *mec);

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.