[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 2 of 5] Improve ring management for memory events. Do not lose guest events



 xen/arch/x86/hvm/hvm.c          |   20 ++-
 xen/arch/x86/mm/mem_event.c     |  205 ++++++++++++++++++++++++++++++---------
 xen/arch/x86/mm/mem_sharing.c   |   27 +++-
 xen/arch/x86/mm/p2m.c           |  104 ++++++++++---------
 xen/common/memory.c             |    7 +-
 xen/include/asm-x86/mem_event.h |   16 ++-
 xen/include/asm-x86/p2m.h       |    6 +-
 xen/include/xen/mm.h            |    2 +
 xen/include/xen/sched.h         |    5 +-
 9 files changed, 268 insertions(+), 124 deletions(-)


The memevent code currently has a mechanism for reserving space in the ring
before putting an event, but each caller must individually ensure that the
vCPUs are correctly paused if no space is available.

This fixes that issue by reversing the semantics: we ensure that enough space
is always left for one event per vCPU in the ring.  If, after putting the
current request, this constraint will be violated by the current vCPU
when putting putting another request in the ring, we pause the vCPU.

For mem events caused by foreign vcpus, we simply drop the event if the
space constraint will be violated. Foreign vcpus are expected to retry
mapping operations (and thus the associated paging populate mem events
will be re issued).

Finally, guests can cause an unbound number of paging drop events via the
balloon -> decrease_reservation hypercall. Thus, notify the hypercall
there is no more space in the ring so a continuation is scheduled.

With all these mechanisms, no guest events are lost and there is no need
for wait-queues. Our testing includes 1. ballooning down 512 MiBs; 2. a
mem event mode in which every page access in a four-vCPU guest results in
an event, with no vCPU pausing, and the four vCPUs touching all RAM. No
guest events were lost in either case, and qemu-dm had no mapping problems.

Additionally, we ensure that no events are lost by Xen as a consumer if multiple
responses land on the ring for a single domctl. While the current domctl-based
notifications to Xen disallow batching, this is required for later patches.

Finally, mark_and_pause_vcpus was misleading, beacause it wasn't strictly
pausing the vcpu's, rather sending them to sleep. Change to actual
vcpu_pause, which protects wakeups via an atomic count. This is useful when
an event pauses a vcpu and the vcpu gets mark and paused due to ring congestion.

Signed-off-by: Adin Scannell <adin@xxxxxxxxxxx>
Signed-off-by: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>

diff -r 8d67f4d5bafa -r 43dc614d543c xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -4108,7 +4108,6 @@ static int hvm_memory_event_traps(long p
     struct vcpu* v = current;
     struct domain *d = v->domain;
     mem_event_request_t req;
-    int rc;
 
     if ( !(p & HVMPME_MODE_MASK) ) 
         return 0;
@@ -4116,10 +4115,6 @@ static int hvm_memory_event_traps(long p
     if ( (p & HVMPME_onchangeonly) && (value == old) )
         return 1;
     
-    rc = mem_event_check_ring(d, &d->mem_access);
-    if ( rc )
-        return rc;
-    
     memset(&req, 0, sizeof(req));
     req.type = MEM_EVENT_TYPE_ACCESS;
     req.reason = reason;
@@ -4139,7 +4134,20 @@ static int hvm_memory_event_traps(long p
         req.gla_valid = 1;
     }
     
-    mem_event_put_request(d, &d->mem_access, &req);
+    if ( mem_event_put_request(d, &d->mem_access, &req) == -ENOSYS )
+    {
+        /* rc == -ENOSYS
+         * If there was no ring to send the event, then simply unpause the
+         * vcpu (if we had previously paused it). It will retry the 
+         * instruction (with the exception "handled") and go on */
+        if ( req.flags & MEM_EVENT_FLAG_VCPU_PAUSED ) 
+            vcpu_unpause(v);
+        /* rc == -EBUSY
+         * If the ring is full, the vcpu has been marked and paused,
+         *  and the exception has been communicated to the consumer.
+         * Once there is room in the ring, the vcpu will be woken up 
+         * and will retry. */
+    }
     
     return 1;
 }
diff -r 8d67f4d5bafa -r 43dc614d543c xen/arch/x86/mm/mem_event.c
--- a/xen/arch/x86/mm/mem_event.c
+++ b/xen/arch/x86/mm/mem_event.c
@@ -91,6 +91,9 @@ static int mem_event_enable(struct domai
     put_gfn(dom_mem_event, ring_gfn);
     put_gfn(dom_mem_event, shared_gfn); 
 
+    /* Set the number of currently blocked vCPUs to 0. */
+    med->blocked = 0;
+
     /* Allocate event channel */
     rc = alloc_unbound_xen_event_channel(d->vcpu[0],
                                          current->domain->domain_id);
@@ -108,7 +111,7 @@ static int mem_event_enable(struct domai
     mem_event_ring_lock_init(med);
 
     /* Wake any VCPUs paused for memory events */
-    mem_event_unpause_vcpus(d);
+    mem_event_unpause_vcpus(d, med);
 
     return 0;
 
@@ -122,8 +125,57 @@ static int mem_event_enable(struct domai
     return rc;
 }
 
-static int mem_event_disable(struct mem_event_domain *med)
+static void __mem_event_unpause_vcpus(struct domain *d, 
+                                        struct mem_event_domain *med, int free)
 {
+    struct vcpu *v;
+    int i, j, k;
+    int online = d->max_vcpus;
+
+    /*
+     * We ensure that we only have vCPUs online if there are enough free slots
+     * for their memory events to be processed.  This will ensure that no
+     * memory events are lost (due to the fact that certain types of events
+     * cannot be replayed, we need to ensure that there is space in the ring
+     * for when they are hit). 
+     * See large comment above in mem_event_put_request().
+     */
+    for_each_vcpu ( d, v )
+        if ( test_bit(_VPF_mem_event, &v->pause_flags) )
+            online--;
+
+    /* We remember which vcpu last woke up to avoid scanning always linearly
+     * from zero and starving higher-numbered vcpus under high load */
+    if ( d->vcpu )
+    {
+        for (i = med->last_vcpu_wake_up + 1, j = 0; j < d->max_vcpus; i++, j++)
+        {
+            k = i % d->max_vcpus;
+            v = d->vcpu[k];
+            if ( !v ) continue;
+
+            if ( !(med->blocked) || online >= free )
+                break;
+            if ( test_and_clear_bit(_VPF_mem_event, &v->pause_flags) )
+            {
+                vcpu_unpause(v);
+                online++;
+                med->blocked--;
+                med->last_vcpu_wake_up = k;
+            }
+        }
+    }
+}
+
+static int mem_event_disable(struct domain *d, struct mem_event_domain *med)
+{
+    if ( !med )
+        return 0;
+
+    /* Wake up everybody, regardless */
+    med->last_vcpu_wake_up = 0;
+    __mem_event_unpause_vcpus(d, med, d->max_vcpus);
+
     unmap_domain_page(med->ring_page);
     med->ring_page = NULL;
 
@@ -133,31 +185,95 @@ static int mem_event_disable(struct mem_
     return 0;
 }
 
-void mem_event_put_request(struct domain *d, struct mem_event_domain *med, 
mem_event_request_t *req)
+static inline int mem_event_ring_free(struct domain *d, struct 
mem_event_domain *med)
 {
+    int free_requests;
+
+    free_requests = RING_FREE_REQUESTS(&med->front_ring);
+    if ( unlikely(free_requests < d->max_vcpus) )
+    {
+        /* This may happen during normal operation (hopefully not often). */
+        gdprintk(XENLOG_INFO, "mem_event request slots for domain %d: %d\n",
+                               d->domain_id, free_requests);
+    }
+
+    return free_requests;
+}
+
+/* Return values
+ * zero: success
+ * -ENOSYS: no ring
+ * -EAGAIN: ring is full and the event has not been transmitted.
+ *          Only foreign vcpu's get EAGAIN
+ * -EBUSY: guest vcpu has been paused due to ring congestion
+ */ 
+int mem_event_put_request(struct domain *d, struct mem_event_domain *med, 
mem_event_request_t *req)
+{
+    int ret = 0;
+    int foreign = (d != current->domain);
     mem_event_front_ring_t *front_ring;
     RING_IDX req_prod;
 
+    if ( mem_event_check_ring(d, med) )
+        return -ENOSYS;
+
     mem_event_ring_lock(med);
 
-    front_ring = &med->front_ring;
-    req_prod = front_ring->req_prod_pvt;
+    if ( foreign &&
+        (mem_event_ring_free(d, med) <= (d->max_vcpus - med->blocked)) )
+    {
+        /* This is an event caused by a foreign mapper. Putting the event
+         * in the ring could preclude a guest vcpu from putting an event.
+         * The foreign mapper has to retry. */
+        gdprintk(XENLOG_DEBUG, "Foreign-caused (%u) event will not be put"
+                 " in ring with %d slots %d sleepers", 
current->domain->domain_id, 
+                 mem_event_ring_free(d, med), med->blocked);
+        mem_event_ring_unlock(med);
+        return -EAGAIN;
+    }
 
-    /* Copy request */
-    memcpy(RING_GET_REQUEST(front_ring, req_prod), req, sizeof(*req));
-    req_prod++;
+    if ( mem_event_ring_free(d, med) == 0 )
+    {
+        /* This should *never* happen */
+        gdprintk(XENLOG_WARNING, "possible lost mem_event for domain %d\n",
+                                 d->domain_id);
+        ret = -EBUSY;
+    }
+    else
+    {
+        front_ring = &med->front_ring;
+        req_prod = front_ring->req_prod_pvt;
 
-    /* Update ring */
-    med->req_producers--;
-    front_ring->req_prod_pvt = req_prod;
-    RING_PUSH_REQUESTS(front_ring);
+        /* Copy request */
+        memcpy(RING_GET_REQUEST(front_ring, req_prod), req, sizeof(*req));
+        req_prod++;
+
+        /* Update ring */
+        front_ring->req_prod_pvt = req_prod;
+        RING_PUSH_REQUESTS(front_ring);
+    }
+
+    /*
+     * We ensure that each vcpu can put at least *one* event -- because some
+     * events are not repeatable, such as dropping a page.  This will ensure no
+     * vCPU is left with an event that they must place on the ring, but cannot.
+     * They will be paused after the event is placed.
+     * See large comment below in mem_event_unpause_vcpus().
+     */
+    if ( !foreign && mem_event_ring_free(d, med) < d->max_vcpus )
+    {
+        mem_event_mark_and_pause(current, med);
+        ret = -EBUSY;
+    }
 
     mem_event_ring_unlock(med);
 
     notify_via_xen_event_channel(d, med->xen_port);
+
+    return ret;
 }
 
-void mem_event_get_response(struct mem_event_domain *med, mem_event_response_t 
*rsp)
+int mem_event_get_response(struct domain *d, struct mem_event_domain *med, 
mem_event_response_t *rsp)
 {
     mem_event_front_ring_t *front_ring;
     RING_IDX rsp_cons;
@@ -167,6 +283,12 @@ void mem_event_get_response(struct mem_e
     front_ring = &med->front_ring;
     rsp_cons = front_ring->rsp_cons;
 
+    if ( !RING_HAS_UNCONSUMED_RESPONSES(front_ring) )
+    {
+        mem_event_ring_unlock(med);
+        return 0;
+    }
+
     /* Copy response */
     memcpy(rsp, RING_GET_RESPONSE(front_ring, rsp_cons), sizeof(*rsp));
     rsp_cons++;
@@ -176,54 +298,35 @@ void mem_event_get_response(struct mem_e
     front_ring->sring->rsp_event = rsp_cons + 1;
 
     mem_event_ring_unlock(med);
+
+    return 1;
 }
 
-void mem_event_unpause_vcpus(struct domain *d)
+void mem_event_unpause_vcpus(struct domain *d, struct mem_event_domain *med)
 {
-    struct vcpu *v;
+    mem_event_ring_lock(med);
 
-    for_each_vcpu ( d, v )
-        if ( test_and_clear_bit(_VPF_mem_event, &v->pause_flags) )
-            vcpu_wake(v);
+    if ( med->blocked )
+        __mem_event_unpause_vcpus(d, med, mem_event_ring_free(d, med));
+
+    mem_event_ring_unlock(med);
 }
 
-void mem_event_mark_and_pause(struct vcpu *v)
+void mem_event_mark_and_pause(struct vcpu *v, struct mem_event_domain *med)
 {
-    set_bit(_VPF_mem_event, &v->pause_flags);
-    vcpu_sleep_nosync(v);
-}
-
-void mem_event_put_req_producers(struct mem_event_domain *med)
-{
-    mem_event_ring_lock(med);
-    med->req_producers--;
-    mem_event_ring_unlock(med);
+    if ( !test_and_set_bit(_VPF_mem_event, &v->pause_flags) )
+    {
+        vcpu_pause_nosync(v);
+        med->blocked++;
+    }
 }
 
 int mem_event_check_ring(struct domain *d, struct mem_event_domain *med)
 {
-    struct vcpu *curr = current;
-    int free_requests;
-    int ring_full = 1;
+    if ( !med->ring_page )
+        return -ENOSYS;
 
-    if ( !med->ring_page )
-        return -1;
-
-    mem_event_ring_lock(med);
-
-    free_requests = RING_FREE_REQUESTS(&med->front_ring);
-    if ( med->req_producers < free_requests )
-    {
-        med->req_producers++;
-        ring_full = 0;
-    }
-
-    if ( ring_full && (curr->domain == d) )
-        mem_event_mark_and_pause(curr);
-
-    mem_event_ring_unlock(med);
-
-    return ring_full;
+    return 0;
 }
 
 int mem_event_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec,
@@ -294,7 +397,7 @@ int mem_event_domctl(struct domain *d, x
         case XEN_DOMCTL_MEM_EVENT_OP_PAGING_DISABLE:
         {
             if ( med->ring_page )
-                rc = mem_event_disable(med);
+                rc = mem_event_disable(d, med);
         }
         break;
 
@@ -333,7 +436,7 @@ int mem_event_domctl(struct domain *d, x
         case XEN_DOMCTL_MEM_EVENT_OP_ACCESS_DISABLE:
         {
             if ( med->ring_page )
-                rc = mem_event_disable(&d->mem_access);
+                rc = mem_event_disable(d, &d->mem_access);
         }
         break;
 
diff -r 8d67f4d5bafa -r 43dc614d543c xen/arch/x86/mm/mem_sharing.c
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -281,12 +281,19 @@ static struct page_info* mem_sharing_all
     vcpu_pause_nosync(v);
     req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
 
-    if(mem_event_check_ring(d, &d->mem_share)) return page;
-
     req.gfn = gfn;
     req.p2mt = p2m_ram_shared;
     req.vcpu_id = v->vcpu_id;
-    mem_event_put_request(d, &d->mem_share, &req);
+
+    /* If there is no ring, and we're out of memory, then
+     * there is no way out. */
+    if ( mem_event_put_request(d, &d->mem_share, &req) == -ENOSYS )
+    {
+        gdprintk(XENLOG_ERR, 
+                 "Failed alloc on unshare path for %hu and no ring "
+                 "to upcall\n", d->domain_id);
+        domain_crash(d);
+    }
 
     return page;
 }
@@ -300,12 +307,16 @@ int mem_sharing_sharing_resume(struct do
 {
     mem_event_response_t rsp;
 
-    /* Get request off the ring */
-    mem_event_get_response(&d->mem_share, &rsp);
+    /* Get all requests off the ring */
+    while ( mem_event_get_response(d, &d->mem_share, &rsp) )
+    {
+        /* Unpause domain/vcpu */
+        if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
+            vcpu_unpause(d->vcpu[rsp.vcpu_id]);
+    }
 
-    /* Unpause domain/vcpu */
-    if( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
-        vcpu_unpause(d->vcpu[rsp.vcpu_id]);
+    /* Unpause any domains that were paused because the ring was full */
+    mem_event_unpause_vcpus(d, &d->mem_paging);
 
     return 0;
 }
diff -r 8d67f4d5bafa -r 43dc614d543c xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -871,13 +871,15 @@ int p2m_mem_paging_evict(struct domain *
  * p2m_mem_paging_drop_page() will notify the pager that a paged-out gfn was
  * released by the guest. The pager is supposed to drop its reference of the
  * gfn.
+ * Returns zero on success, EAGAIN for foreign mappers and EBUSY for guest 
+ * vcpus if the ring is congested.
  */
-void p2m_mem_paging_drop_page(struct domain *d, unsigned long gfn)
+int p2m_mem_paging_drop_page(struct domain *d, unsigned long gfn)
 {
     struct vcpu *v = current;
     mem_event_request_t req;
 
-    /* Check that there's space on the ring for this request */
+    /* Check that there's a paging listener who cares about this */
     if ( mem_event_check_ring(d, &d->mem_paging) == 0)
     {
         /* Send release notification to pager */
@@ -886,8 +888,11 @@ void p2m_mem_paging_drop_page(struct dom
         req.gfn = gfn;
         req.vcpu_id = v->vcpu_id;
 
-        mem_event_put_request(d, &d->mem_paging, &req);
+        /* rc cannot be ENOSYS, as we checked before */
+        return mem_event_put_request(d, &d->mem_paging, &req);
     }
+
+    return 0;
 }
 
 /**
@@ -920,9 +925,14 @@ void p2m_mem_paging_populate(struct doma
     mfn_t mfn;
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
 
-    /* Check that there's space on the ring for this request */
+    /* We're paging. There should be a ring */
     if ( mem_event_check_ring(d, &d->mem_paging) )
+    {
+        gdprintk(XENLOG_ERR, "Domain %hu paging gfn %lx yet no ring "
+                    "in place\n", d->domain_id, gfn);
+        domain_crash(d);
         return;
+    }
 
     memset(&req, 0, sizeof(req));
     req.type = MEM_EVENT_TYPE_PAGING;
@@ -951,7 +961,6 @@ void p2m_mem_paging_populate(struct doma
     else if ( p2mt != p2m_ram_paging_out && p2mt != p2m_ram_paged )
     {
         /* gfn is already on its way back and vcpu is not paused */
-        mem_event_put_req_producers(&d->mem_paging);
         return;
     }
 
@@ -960,7 +969,10 @@ void p2m_mem_paging_populate(struct doma
     req.p2mt = p2mt;
     req.vcpu_id = v->vcpu_id;
 
-    mem_event_put_request(d, &d->mem_paging, &req);
+    (void)mem_event_put_request(d, &d->mem_paging, &req);
+    /* If the ring is full, foreign mappers will retry, and guest
+     * vcpus remain paused. Guest vcpu will wake up and retry once
+     * the consumer has made some space in the ring. */
 }
 
 /**
@@ -1084,33 +1096,34 @@ void p2m_mem_paging_resume(struct domain
     p2m_access_t a;
     mfn_t mfn;
 
-    /* Pull the response off the ring */
-    mem_event_get_response(&d->mem_paging, &rsp);
+    /* Pull all responses off the ring */
+    while( mem_event_get_response(d, &d->mem_paging, &rsp) )
+    {
+        /* Fix p2m entry if the page was not dropped */
+        if ( !(rsp.flags & MEM_EVENT_FLAG_DROP_PAGE) )
+        {
+            p2m_lock(p2m);
+            mfn = p2m->get_entry(p2m, rsp.gfn, &p2mt, &a, p2m_query, NULL);
+            /* Allow only pages which were prepared properly, or pages which
+             * were nominated but not evicted */
+            if ( mfn_valid(mfn) && 
+                 (p2mt == p2m_ram_paging_in || p2mt == 
p2m_ram_paging_in_start) )
+            {
+                set_p2m_entry(p2m, rsp.gfn, mfn, PAGE_ORDER_4K, 
+                                paging_mode_log_dirty(d) ? p2m_ram_logdirty : 
+                                p2m_ram_rw, a);
+                set_gpfn_from_mfn(mfn_x(mfn), rsp.gfn);
+            }
+            p2m_unlock(p2m);
+        }
 
-    /* Fix p2m entry if the page was not dropped */
-    if ( !(rsp.flags & MEM_EVENT_FLAG_DROP_PAGE) )
-    {
-        p2m_lock(p2m);
-        mfn = p2m->get_entry(p2m, rsp.gfn, &p2mt, &a, p2m_query, NULL);
-        /* Allow only pages which were prepared properly, or pages which
-         * were nominated but not evicted */
-        if ( mfn_valid(mfn) && 
-             (p2mt == p2m_ram_paging_in || p2mt == p2m_ram_paging_in_start) )
-        {
-            set_p2m_entry(p2m, rsp.gfn, mfn, PAGE_ORDER_4K, 
-                            paging_mode_log_dirty(d) ? p2m_ram_logdirty : 
p2m_ram_rw, 
-                            a);
-            set_gpfn_from_mfn(mfn_x(mfn), rsp.gfn);
-        }
-        p2m_unlock(p2m);
+        /* Unpause domain */
+        if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
+            vcpu_unpause(d->vcpu[rsp.vcpu_id]);
     }
 
-    /* Unpause domain */
-    if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
-        vcpu_unpause(d->vcpu[rsp.vcpu_id]);
-
     /* Unpause any domains that were paused because the ring was full */
-    mem_event_unpause_vcpus(d);
+    mem_event_unpause_vcpus(d, &d->mem_paging);
 }
 
 void p2m_mem_access_check(unsigned long gpa, bool_t gla_valid, unsigned long 
gla, 
@@ -1121,7 +1134,6 @@ void p2m_mem_access_check(unsigned long 
     unsigned long gfn = gpa >> PAGE_SHIFT;
     struct domain *d = v->domain;    
     struct p2m_domain* p2m = p2m_get_hostp2m(d);
-    int res;
     mfn_t mfn;
     p2m_type_t p2mt;
     p2m_access_t p2ma;
@@ -1139,17 +1151,14 @@ void p2m_mem_access_check(unsigned long 
     p2m_unlock(p2m);
 
     /* Otherwise, check if there is a memory event listener, and send the 
message along */
-    res = mem_event_check_ring(d, &d->mem_access);
-    if ( res < 0 ) 
+    if ( mem_event_check_ring(d, &d->mem_access) == -ENOSYS ) 
     {
         /* No listener */
         if ( p2m->access_required ) 
         {
-            printk(XENLOG_INFO 
-                   "Memory access permissions failure, no mem_event listener: 
pausing VCPU %d, dom %d\n",
-                   v->vcpu_id, d->domain_id);
-
-            mem_event_mark_and_pause(v);
+            gdprintk(XENLOG_INFO, "Memory access permissions failure, no 
mem_event "
+                     "listener VCPU %d, dom %d\n", v->vcpu_id, d->domain_id);
+            domain_crash(v->domain);
         }
         else
         {
@@ -1161,8 +1170,6 @@ void p2m_mem_access_check(unsigned long 
 
         return;
     }
-    else if ( res > 0 )
-        return;  /* No space in buffer; VCPU paused */
 
     memset(&req, 0, sizeof(req));
     req.type = MEM_EVENT_TYPE_ACCESS;
@@ -1183,9 +1190,8 @@ void p2m_mem_access_check(unsigned long 
     
     req.vcpu_id = v->vcpu_id;
 
-    mem_event_put_request(d, &d->mem_access, &req);
-
-    /* VCPU paused, mem event request sent */
+    (void)mem_event_put_request(d, &d->mem_access, &req);
+    /* VCPU paused */
 }
 
 void p2m_mem_access_resume(struct p2m_domain *p2m)
@@ -1193,15 +1199,17 @@ void p2m_mem_access_resume(struct p2m_do
     struct domain *d = p2m->domain;
     mem_event_response_t rsp;
 
-    mem_event_get_response(&d->mem_access, &rsp);
-
-    /* Unpause domain */
-    if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
-        vcpu_unpause(d->vcpu[rsp.vcpu_id]);
+    /* Pull all responses off the ring */
+    while( mem_event_get_response(d, &d->mem_access, &rsp) )
+    {
+        /* Unpause domain */
+        if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
+            vcpu_unpause(d->vcpu[rsp.vcpu_id]);
+    }
 
     /* Unpause any domains that were paused because the ring was full or no 
listener 
      * was available */
-    mem_event_unpause_vcpus(d);
+    mem_event_unpause_vcpus(d, &d->mem_access);
 }
 
 
diff -r 8d67f4d5bafa -r 43dc614d543c xen/common/memory.c
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -165,10 +165,13 @@ int guest_remove_page(struct domain *d, 
     mfn = mfn_x(get_gfn(d, gmfn, &p2mt)); 
     if ( unlikely(p2m_is_paging(p2mt)) )
     {
+        int rc; 
         guest_physmap_remove_page(d, gmfn, mfn, 0);
-        p2m_mem_paging_drop_page(d, gmfn);
+        rc = p2m_mem_paging_drop_page(d, gmfn);
         put_gfn(d, gmfn);
-        return 1;
+        /* drop_page returns zero on success, we return 1 on success.
+         * drop_page returns negative on error, never returns 1. */
+        return rc ? rc : 1;
     }
 #else
     mfn = gmfn_to_mfn(d, gmfn);
diff -r 8d67f4d5bafa -r 43dc614d543c xen/include/asm-x86/mem_event.h
--- a/xen/include/asm-x86/mem_event.h
+++ b/xen/include/asm-x86/mem_event.h
@@ -25,12 +25,18 @@
 #define __MEM_EVENT_H__
 
 /* Pauses VCPU while marking pause flag for mem event */
-void mem_event_mark_and_pause(struct vcpu *v);
+void mem_event_mark_and_pause(struct vcpu *v, struct mem_event_domain *med);
 int mem_event_check_ring(struct domain *d, struct mem_event_domain *med);
-void mem_event_put_req_producers(struct mem_event_domain *med);
-void mem_event_put_request(struct domain *d, struct mem_event_domain *med, 
mem_event_request_t *req);
-void mem_event_get_response(struct mem_event_domain *med, mem_event_response_t 
*rsp);
-void mem_event_unpause_vcpus(struct domain *d);
+void mem_event_unpause_vcpus(struct domain *d, struct mem_event_domain *med);
+
+/* Returns 0 on success, -ENOSYS if there is no ring, -EBUSY if there is 
+ * a ring or no space. FOr success or -EBUSY. the vCPU is left blocked to 
+ * ensure that the ring does not lose events.  In general, put_request should 
+ * not fail, as it attempts to ensure that each vCPU can put at least one req. 
*/
+int mem_event_put_request(struct domain *d, struct mem_event_domain *med,
+                            mem_event_request_t *req);
+int mem_event_get_response(struct domain *d, struct mem_event_domain *med,
+                            mem_event_response_t *rsp);
 
 int mem_event_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec,
                      XEN_GUEST_HANDLE(void) u_domctl);
diff -r 8d67f4d5bafa -r 43dc614d543c xen/include/asm-x86/p2m.h
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -475,7 +475,7 @@ int p2m_mem_paging_nominate(struct domai
 /* Evict a frame */
 int p2m_mem_paging_evict(struct domain *d, unsigned long gfn);
 /* Tell xenpaging to drop a paged out frame */
-void p2m_mem_paging_drop_page(struct domain *d, unsigned long gfn);
+int p2m_mem_paging_drop_page(struct domain *d, unsigned long gfn);
 /* Start populating a paged out frame */
 void p2m_mem_paging_populate(struct domain *d, unsigned long gfn);
 /* Prepare the p2m for paging a frame in */
@@ -483,8 +483,8 @@ int p2m_mem_paging_prep(struct domain *d
 /* Resume normal operation (in case a domain was paused) */
 void p2m_mem_paging_resume(struct domain *d);
 #else
-static inline void p2m_mem_paging_drop_page(struct domain *d, unsigned long 
gfn)
-{ }
+static inline int p2m_mem_paging_drop_page(struct domain *d, unsigned long gfn)
+{ return 0; }
 static inline void p2m_mem_paging_populate(struct domain *d, unsigned long gfn)
 { }
 #endif
diff -r 8d67f4d5bafa -r 43dc614d543c xen/include/xen/mm.h
--- a/xen/include/xen/mm.h
+++ b/xen/include/xen/mm.h
@@ -318,6 +318,8 @@ page_list_splice(struct page_list_head *
 
 void scrub_one_page(struct page_info *);
 
+/* Returns 1 on success, 0 on error, negative if the ring
+ * for event propagation is full in the presence of paging */
 int guest_remove_page(struct domain *d, unsigned long gmfn);
 
 #define RAM_TYPE_CONVENTIONAL 0x00000001
diff -r 8d67f4d5bafa -r 43dc614d543c xen/include/xen/sched.h
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -183,13 +183,16 @@ struct mem_event_domain
 {
     /* ring lock */
     spinlock_t ring_lock;
-    unsigned int req_producers;
     /* shared page */
     mem_event_shared_page_t *shared_page;
     /* shared ring page */
     void *ring_page;
     /* front-end ring */
     mem_event_front_ring_t front_ring;
+    /* the number of vCPUs blocked */
+    unsigned int blocked;
+    /* The last vcpu woken up */
+    unsigned int last_vcpu_wake_up;
     /* event channel port (vcpu0 only) */
     int xen_port;
 };

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.