[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 4 of 7] Re-order calls to put_gfn() around wait queue invocations



 xen/arch/x86/hvm/emulate.c       |   2 +-
 xen/arch/x86/hvm/hvm.c           |  25 +++++++++++++------
 xen/arch/x86/mm.c                |  10 +++---
 xen/arch/x86/mm/guest_walk.c     |   2 +-
 xen/arch/x86/mm/hap/guest_walk.c |   6 +---
 xen/arch/x86/mm/mem_access.c     |  10 +++++++
 xen/arch/x86/mm/mem_event.c      |   5 +++
 xen/arch/x86/mm/p2m.c            |  52 ++++++++++++++++++++++-----------------
 xen/common/grant_table.c         |   2 +-
 xen/common/memory.c              |   2 +-
 xen/include/asm-x86/mem_access.h |   1 +
 xen/include/asm-x86/mem_event.h  |   3 ++
 xen/include/asm-x86/p2m.h        |  10 +++++--
 13 files changed, 83 insertions(+), 47 deletions(-)


Since we use wait queues to handle potential ring congestion cases,
code paths that try to generate a mem event while holding a gfn lock
would go to sleep in non-preemptible mode.

Most such code paths can be fixed by simply postponing event generation until
locks are released.

Signed-off-by: Adin Scannell <adin@xxxxxxxxxxx>
Signed-off-by: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
Acked-by: Tim Deegan <tim@xxxxxxx>

diff -r 5416a3b371f2 -r f2d967332acc xen/arch/x86/hvm/emulate.c
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -67,8 +67,8 @@ static int hvmemul_do_io(
     ram_mfn = get_gfn_unshare(curr->domain, ram_gfn, &p2mt);
     if ( p2m_is_paging(p2mt) )
     {
+        put_gfn(curr->domain, ram_gfn); 
         p2m_mem_paging_populate(curr->domain, ram_gfn);
-        put_gfn(curr->domain, ram_gfn); 
         return X86EMUL_RETRY;
     }
     if ( p2m_is_shared(p2mt) )
diff -r 5416a3b371f2 -r f2d967332acc xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -64,6 +64,7 @@
 #include <public/version.h>
 #include <public/memory.h>
 #include <asm/mem_event.h>
+#include <asm/mem_access.h>
 #include <public/mem_event.h>
 
 bool_t __read_mostly hvm_enabled;
@@ -363,8 +364,8 @@ static int hvm_set_ioreq_page(
     }
     if ( p2m_is_paging(p2mt) )
     {
+        put_gfn(d, gmfn);
         p2m_mem_paging_populate(d, gmfn);
-        put_gfn(d, gmfn);
         return -ENOENT;
     }
     if ( p2m_is_shared(p2mt) )
@@ -1195,7 +1196,8 @@ int hvm_hap_nested_page_fault(unsigned l
     mfn_t mfn;
     struct vcpu *v = current;
     struct p2m_domain *p2m;
-    int rc, fall_through = 0;
+    int rc, fall_through = 0, paged = 0;
+    mem_event_request_t *req_ptr = NULL;
 
     /* On Nested Virtualization, walk the guest page table.
      * If this succeeds, all is fine.
@@ -1270,7 +1272,7 @@ int hvm_hap_nested_page_fault(unsigned l
         if ( violation )
         {
             if ( p2m_mem_access_check(gpa, gla_valid, gla, access_r, 
-                                        access_w, access_x) )
+                                        access_w, access_x, &req_ptr) )
             {
                 fall_through = 1;
             } else {
@@ -1297,7 +1299,7 @@ int hvm_hap_nested_page_fault(unsigned l
 #ifdef __x86_64__
     /* Check if the page has been paged out */
     if ( p2m_is_paged(p2mt) || (p2mt == p2m_ram_paging_out) )
-        p2m_mem_paging_populate(v->domain, gfn);
+        paged = 1;
 
     /* Mem sharing: unshare the page and try again */
     if ( access_w && (p2mt == p2m_ram_shared) )
@@ -1343,6 +1345,13 @@ int hvm_hap_nested_page_fault(unsigned l
 
 out_put_gfn:
     put_gfn(p2m->domain, gfn);
+    if ( paged )
+        p2m_mem_paging_populate(v->domain, gfn);
+    if ( req_ptr )
+    {
+        mem_access_send_req(v->domain, req_ptr);
+        xfree(req_ptr);
+    }
     return rc;
 }
 
@@ -1849,8 +1858,8 @@ static void *__hvm_map_guest_frame(unsig
     }
     if ( p2m_is_paging(p2mt) )
     {
+        put_gfn(d, gfn);
         p2m_mem_paging_populate(d, gfn);
-        put_gfn(d, gfn);
         return NULL;
     }
 
@@ -2325,8 +2334,8 @@ static enum hvm_copy_result __hvm_copy(
 
         if ( p2m_is_paging(p2mt) )
         {
+            put_gfn(curr->domain, gfn);
             p2m_mem_paging_populate(curr->domain, gfn);
-            put_gfn(curr->domain, gfn);
             return HVMCOPY_gfn_paged_out;
         }
         if ( p2m_is_shared(p2mt) )
@@ -3923,8 +3932,8 @@ long do_hvm_op(unsigned long op, XEN_GUE
             mfn_t mfn = get_gfn_unshare(d, pfn, &t);
             if ( p2m_is_paging(t) )
             {
+                put_gfn(d, pfn);
                 p2m_mem_paging_populate(d, pfn);
-                put_gfn(d, pfn);
                 rc = -EINVAL;
                 goto param_fail3;
             }
@@ -4040,8 +4049,8 @@ long do_hvm_op(unsigned long op, XEN_GUE
             mfn = get_gfn_unshare(d, pfn, &t);
             if ( p2m_is_paging(t) )
             {
+                put_gfn(d, pfn);
                 p2m_mem_paging_populate(d, pfn);
-                put_gfn(d, pfn);
                 rc = -EINVAL;
                 goto param_fail4;
             }
diff -r 5416a3b371f2 -r f2d967332acc xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -3536,8 +3536,8 @@ int do_mmu_update(
 
             if ( p2m_is_paged(p2mt) )
             {
+                put_gfn(pt_owner, gmfn);
                 p2m_mem_paging_populate(pg_owner, gmfn);
-                put_gfn(pt_owner, gmfn);
                 rc = -ENOENT;
                 break;
             }
@@ -3568,8 +3568,8 @@ int do_mmu_update(
 
                     if ( p2m_is_paged(l1e_p2mt) )
                     {
+                        put_gfn(pg_owner, l1egfn);
                         p2m_mem_paging_populate(pg_owner, l1e_get_pfn(l1e));
-                        put_gfn(pg_owner, l1egfn);
                         rc = -ENOENT;
                         break;
                     }
@@ -3617,8 +3617,8 @@ int do_mmu_update(
 
                     if ( p2m_is_paged(l2e_p2mt) )
                     {
+                        put_gfn(pg_owner, l2egfn);
                         p2m_mem_paging_populate(pg_owner, l2egfn);
-                        put_gfn(pg_owner, l2egfn);
                         rc = -ENOENT;
                         break;
                     }
@@ -3652,8 +3652,8 @@ int do_mmu_update(
 
                     if ( p2m_is_paged(l3e_p2mt) )
                     {
+                        put_gfn(pg_owner, l3egfn);
                         p2m_mem_paging_populate(pg_owner, l3egfn);
-                        put_gfn(pg_owner, l3egfn);
                         rc = -ENOENT;
                         break;
                     }
@@ -3687,8 +3687,8 @@ int do_mmu_update(
 
                     if ( p2m_is_paged(l4e_p2mt) )
                     {
+                        put_gfn(pg_owner, l4egfn);
                         p2m_mem_paging_populate(pg_owner, l4egfn);
-                        put_gfn(pg_owner, l4egfn);
                         rc = -ENOENT;
                         break;
                     }
diff -r 5416a3b371f2 -r f2d967332acc xen/arch/x86/mm/guest_walk.c
--- a/xen/arch/x86/mm/guest_walk.c
+++ b/xen/arch/x86/mm/guest_walk.c
@@ -102,8 +102,8 @@ static inline void *map_domain_gfn(struc
     if ( p2m_is_paging(*p2mt) )
     {
         ASSERT(!p2m_is_nestedp2m(p2m));
+        __put_gfn(p2m, gfn_x(gfn));
         p2m_mem_paging_populate(p2m->domain, gfn_x(gfn));
-        __put_gfn(p2m, gfn_x(gfn));
         *rc = _PAGE_PAGED;
         return NULL;
     }
diff -r 5416a3b371f2 -r f2d967332acc xen/arch/x86/mm/hap/guest_walk.c
--- a/xen/arch/x86/mm/hap/guest_walk.c
+++ b/xen/arch/x86/mm/hap/guest_walk.c
@@ -64,10 +64,9 @@ unsigned long hap_p2m_ga_to_gfn(GUEST_PA
     if ( p2m_is_paging(p2mt) )
     {
         ASSERT(!p2m_is_nestedp2m(p2m));
-        p2m_mem_paging_populate(p2m->domain, cr3 >> PAGE_SHIFT);
-
         pfec[0] = PFEC_page_paged;
         __put_gfn(p2m, top_gfn);
+        p2m_mem_paging_populate(p2m->domain, cr3 >> PAGE_SHIFT);
         return INVALID_GFN;
     }
     if ( p2m_is_shared(p2mt) )
@@ -101,10 +100,9 @@ unsigned long hap_p2m_ga_to_gfn(GUEST_PA
         if ( p2m_is_paging(p2mt) )
         {
             ASSERT(!p2m_is_nestedp2m(p2m));
-            p2m_mem_paging_populate(p2m->domain, gfn_x(gfn));
-
             pfec[0] = PFEC_page_paged;
             __put_gfn(p2m, gfn_x(gfn));
+            p2m_mem_paging_populate(p2m->domain, gfn_x(gfn));
             return INVALID_GFN;
         }
         if ( p2m_is_shared(p2mt) )
diff -r 5416a3b371f2 -r f2d967332acc xen/arch/x86/mm/mem_access.c
--- a/xen/arch/x86/mm/mem_access.c
+++ b/xen/arch/x86/mm/mem_access.c
@@ -47,6 +47,16 @@ int mem_access_domctl(struct domain *d, 
     return rc;
 }
 
+int mem_access_send_req(struct domain *d, mem_event_request_t *req)
+{
+    int rc = mem_event_claim_slot(d, &d->mem_event->access);
+    if ( rc < 0 )
+        return rc;
+
+    mem_event_put_request(d, &d->mem_event->access, req);
+
+    return 0;
+} 
 
 /*
  * Local variables:
diff -r 5416a3b371f2 -r f2d967332acc xen/arch/x86/mm/mem_event.c
--- a/xen/arch/x86/mm/mem_event.c
+++ b/xen/arch/x86/mm/mem_event.c
@@ -423,6 +423,11 @@ static int mem_event_wait_slot(struct me
     return rc;
 }
 
+bool_t mem_event_check_ring(struct mem_event_domain *med)
+{
+    return (med->ring_page != NULL);
+}
+
 /*
  * Determines whether or not the current vCPU belongs to the target domain,
  * and calls the appropriate wait function.  If it is a guest vCPU, then we
diff -r 5416a3b371f2 -r f2d967332acc xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -1130,17 +1130,18 @@ void p2m_mem_paging_resume(struct domain
 }
 
 bool_t p2m_mem_access_check(unsigned long gpa, bool_t gla_valid, unsigned long 
gla, 
-                          bool_t access_r, bool_t access_w, bool_t access_x)
+                          bool_t access_r, bool_t access_w, bool_t access_x,
+                          mem_event_request_t **req_ptr)
 {
     struct vcpu *v = current;
-    mem_event_request_t req;
     unsigned long gfn = gpa >> PAGE_SHIFT;
     struct domain *d = v->domain;    
     struct p2m_domain* p2m = p2m_get_hostp2m(d);
     mfn_t mfn;
     p2m_type_t p2mt;
     p2m_access_t p2ma;
-    
+    mem_event_request_t *req;
+
     /* First, handle rx2rw conversion automatically */
     gfn_lock(p2m, gfn, 0);
     mfn = p2m->get_entry(p2m, gfn, &p2mt, &p2ma, p2m_query, NULL);
@@ -1159,7 +1160,7 @@ bool_t p2m_mem_access_check(unsigned lon
     gfn_unlock(p2m, gfn, 0);
 
     /* Otherwise, check if there is a memory event listener, and send the 
message along */
-    if ( mem_event_claim_slot(d, &d->mem_event->access) == -ENOSYS )
+    if ( !mem_event_check_ring(&d->mem_event->access) || !req_ptr ) 
     {
         /* No listener */
         if ( p2m->access_required ) 
@@ -1183,29 +1184,34 @@ bool_t p2m_mem_access_check(unsigned lon
         }
     }
 
-    memset(&req, 0, sizeof(req));
-    req.type = MEM_EVENT_TYPE_ACCESS;
-    req.reason = MEM_EVENT_REASON_VIOLATION;
+    *req_ptr = NULL;
+    req = xmalloc(mem_event_request_t);
+    if ( req )
+    {
+        *req_ptr = req;
+        memset(req, 0, sizeof(req));
+        req->type = MEM_EVENT_TYPE_ACCESS;
+        req->reason = MEM_EVENT_REASON_VIOLATION;
+
+        /* Pause the current VCPU */
+        if ( p2ma != p2m_access_n2rwx )
+            req->flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
+
+        /* Send request to mem event */
+        req->gfn = gfn;
+        req->offset = gpa & ((1 << PAGE_SHIFT) - 1);
+        req->gla_valid = gla_valid;
+        req->gla = gla;
+        req->access_r = access_r;
+        req->access_w = access_w;
+        req->access_x = access_x;
+    
+        req->vcpu_id = v->vcpu_id;
+    }
 
     /* Pause the current VCPU */
     if ( p2ma != p2m_access_n2rwx )
-    {
         vcpu_pause_nosync(v);
-        req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
-    } 
-
-    /* Send request to mem event */
-    req.gfn = gfn;
-    req.offset = gpa & ((1 << PAGE_SHIFT) - 1);
-    req.gla_valid = gla_valid;
-    req.gla = gla;
-    req.access_r = access_r;
-    req.access_w = access_w;
-    req.access_x = access_x;
-    
-    req.vcpu_id = v->vcpu_id;
-
-    mem_event_put_request(d, &d->mem_event->access, &req);
 
     /* VCPU may be paused, return whether we promoted automatically */
     return (p2ma == p2m_access_n2rwx);
diff -r 5416a3b371f2 -r f2d967332acc xen/common/grant_table.c
--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -164,8 +164,8 @@ static int __get_paged_frame(unsigned lo
         *frame = mfn_x(mfn);
         if ( p2m_is_paging(p2mt) )
         {
+            put_gfn(rd, gfn);
             p2m_mem_paging_populate(rd, gfn);
-            put_gfn(rd, gfn);
             rc = GNTST_eagain;
         }
     } else {
diff -r 5416a3b371f2 -r f2d967332acc xen/common/memory.c
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -166,8 +166,8 @@ int guest_remove_page(struct domain *d, 
     if ( unlikely(p2m_is_paging(p2mt)) )
     {
         guest_physmap_remove_page(d, gmfn, mfn, 0);
+        put_gfn(d, gmfn);
         p2m_mem_paging_drop_page(d, gmfn, p2mt);
-        put_gfn(d, gmfn);
         return 1;
     }
 #else
diff -r 5416a3b371f2 -r f2d967332acc xen/include/asm-x86/mem_access.h
--- a/xen/include/asm-x86/mem_access.h
+++ b/xen/include/asm-x86/mem_access.h
@@ -23,6 +23,7 @@
 
 int mem_access_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec,
                       XEN_GUEST_HANDLE(void) u_domctl);
+int mem_access_send_req(struct domain *d, mem_event_request_t *req);
 
 
 /*
diff -r 5416a3b371f2 -r f2d967332acc xen/include/asm-x86/mem_event.h
--- a/xen/include/asm-x86/mem_event.h
+++ b/xen/include/asm-x86/mem_event.h
@@ -24,6 +24,9 @@
 #ifndef __MEM_EVENT_H__
 #define __MEM_EVENT_H__
 
+/* Returns whether a ring has been set up */
+bool_t mem_event_check_ring(struct mem_event_domain *med);
+
 /* Returns 0 on success, -ENOSYS if there is no ring, -EBUSY if there is no
  * available space. For success or -EBUSY, the vCPU may be left blocked
  * temporarily to ensure that the ring does not lose future events.  In
diff -r 5416a3b371f2 -r f2d967332acc xen/include/asm-x86/p2m.h
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -494,9 +494,12 @@ static inline void p2m_mem_paging_popula
 #ifdef __x86_64__
 /* Send mem event based on the access (gla is -1ull if not available).  Handles
  * the rw2rx conversion. Boolean return value indicates if access rights have 
- * been promoted with no underlying vcpu pause. */
+ * been promoted with no underlying vcpu pause. If the req_ptr has been 
populated, 
+ * then the caller must put the event in the ring (once having released 
get_gfn*
+ * locks -- caller must also xfree the request. */
 bool_t p2m_mem_access_check(unsigned long gpa, bool_t gla_valid, unsigned long 
gla, 
-                          bool_t access_r, bool_t access_w, bool_t access_x);
+                          bool_t access_r, bool_t access_w, bool_t access_x,
+                          mem_event_request_t **req_ptr);
 /* Resumes the running of the VCPU, restarting the last instruction */
 void p2m_mem_access_resume(struct domain *d);
 
@@ -513,7 +516,8 @@ int p2m_get_mem_access(struct domain *d,
 #else
 static inline bool_t p2m_mem_access_check(unsigned long gpa, bool_t gla_valid, 
                                         unsigned long gla, bool_t access_r, 
-                                        bool_t access_w, bool_t access_x)
+                                        bool_t access_w, bool_t access_x,
+                                        mem_event_request_t **req_ptr)
 { return 1; }
 static inline int p2m_set_mem_access(struct domain *d, 
                                      unsigned long start_pfn, 

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.